Discussion:
[RFC PATCH ghak32 V2 01/13] audit: add container id
(too old to reply)
Richard Guy Briggs
2018-03-16 09:00:28 UTC
Permalink
Raw Message
Implement the proc fs write to set the audit container ID of a process,
emitting an AUDIT_CONTAINER record to document the event.

This is a write from the container orchestrator task to a proc entry of
the form /proc/PID/containerid where PID is the process ID of the newly
created task that is to become the first task in a container, or an
additional task added to a container.

The write expects up to a u64 value (unset: 18446744073709551615).

This will produce a record such as this:
type=CONTAINER msg=audit(1519903238.968:261): op=set pid=596 uid=0 subj=unconfined_u:unconfined_r:unconfined_t:s0-s0:c0.c1023 auid=0 tty=pts0 ses=1 opid=596 old-contid=18446744073709551615 contid=123455 res=0

The "op" field indicates an initial set. The "pid" to "ses" fields are
the orchestrator while the "opid" field is the object's PID, the process
being "contained". Old and new container ID values are given in the
"contid" fields, while res indicates its success.

It is not permitted to self-set, unset or re-set the container ID. A
child inherits its parent's container ID, but then can be set only once
after.

See: https://github.com/linux-audit/audit-kernel/issues/32

Signed-off-by: Richard Guy Briggs <***@redhat.com>
---
fs/proc/base.c | 37 ++++++++++++++++++++
include/linux/audit.h | 16 +++++++++
include/linux/init_task.h | 4 ++-
include/linux/sched.h | 1 +
include/uapi/linux/audit.h | 2 ++
kernel/auditsc.c | 84 ++++++++++++++++++++++++++++++++++++++++++++++
6 files changed, 143 insertions(+), 1 deletion(-)

diff --git a/fs/proc/base.c b/fs/proc/base.c
index 60316b5..6ce4fbe 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -1299,6 +1299,41 @@ static ssize_t proc_sessionid_read(struct file * file, char __user * buf,
.read = proc_sessionid_read,
.llseek = generic_file_llseek,
};
+
+static ssize_t proc_containerid_write(struct file *file, const char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ struct inode *inode = file_inode(file);
+ u64 containerid;
+ int rv;
+ struct task_struct *task = get_proc_task(inode);
+
+ if (!task)
+ return -ESRCH;
+ if (*ppos != 0) {
+ /* No partial writes. */
+ put_task_struct(task);
+ return -EINVAL;
+ }
+
+ rv = kstrtou64_from_user(buf, count, 10, &containerid);
+ if (rv < 0) {
+ put_task_struct(task);
+ return rv;
+ }
+
+ rv = audit_set_containerid(task, containerid);
+ put_task_struct(task);
+ if (rv < 0)
+ return rv;
+ return count;
+}
+
+static const struct file_operations proc_containerid_operations = {
+ .write = proc_containerid_write,
+ .llseek = generic_file_llseek,
+};
+
#endif

#ifdef CONFIG_FAULT_INJECTION
@@ -2961,6 +2996,7 @@ static int proc_pid_patch_state(struct seq_file *m, struct pid_namespace *ns,
#ifdef CONFIG_AUDITSYSCALL
REG("loginuid", S_IWUSR|S_IRUGO, proc_loginuid_operations),
REG("sessionid", S_IRUGO, proc_sessionid_operations),
+ REG("containerid", S_IWUSR, proc_containerid_operations),
#endif
#ifdef CONFIG_FAULT_INJECTION
REG("make-it-fail", S_IRUGO|S_IWUSR, proc_fault_inject_operations),
@@ -3355,6 +3391,7 @@ static int proc_tid_comm_permission(struct inode *inode, int mask)
#ifdef CONFIG_AUDITSYSCALL
REG("loginuid", S_IWUSR|S_IRUGO, proc_loginuid_operations),
REG("sessionid", S_IRUGO, proc_sessionid_operations),
+ REG("containerid", S_IWUSR, proc_containerid_operations),
#endif
#ifdef CONFIG_FAULT_INJECTION
REG("make-it-fail", S_IRUGO|S_IWUSR, proc_fault_inject_operations),
diff --git a/include/linux/audit.h b/include/linux/audit.h
index af410d9..fe4ba3f 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -29,6 +29,7 @@

#define AUDIT_INO_UNSET ((unsigned long)-1)
#define AUDIT_DEV_UNSET ((dev_t)-1)
+#define INVALID_CID AUDIT_CID_UNSET

struct audit_sig_info {
uid_t uid;
@@ -321,6 +322,7 @@ static inline void audit_ptrace(struct task_struct *t)
extern int auditsc_get_stamp(struct audit_context *ctx,
struct timespec64 *t, unsigned int *serial);
extern int audit_set_loginuid(kuid_t loginuid);
+extern int audit_set_containerid(struct task_struct *tsk, u64 containerid);

static inline kuid_t audit_get_loginuid(struct task_struct *tsk)
{
@@ -332,6 +334,11 @@ static inline unsigned int audit_get_sessionid(struct task_struct *tsk)
return tsk->sessionid;
}

+static inline u64 audit_get_containerid(struct task_struct *tsk)
+{
+ return tsk->containerid;
+}
+
extern void __audit_ipc_obj(struct kern_ipc_perm *ipcp);
extern void __audit_ipc_set_perm(unsigned long qbytes, uid_t uid, gid_t gid, umode_t mode);
extern void __audit_bprm(struct linux_binprm *bprm);
@@ -517,6 +524,10 @@ static inline unsigned int audit_get_sessionid(struct task_struct *tsk)
{
return -1;
}
+static inline kuid_t audit_get_containerid(struct task_struct *tsk)
+{
+ return INVALID_CID;
+}
static inline void audit_ipc_obj(struct kern_ipc_perm *ipcp)
{ }
static inline void audit_ipc_set_perm(unsigned long qbytes, uid_t uid,
@@ -581,6 +592,11 @@ static inline bool audit_loginuid_set(struct task_struct *tsk)
return uid_valid(audit_get_loginuid(tsk));
}

+static inline bool audit_containerid_set(struct task_struct *tsk)
+{
+ return audit_get_containerid(tsk) != INVALID_CID;
+}
+
static inline void audit_log_string(struct audit_buffer *ab, const char *buf)
{
audit_log_n_string(ab, buf, strlen(buf));
diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index 6a53262..046bd0a 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -18,6 +18,7 @@
#include <linux/sched/rt.h>
#include <linux/livepatch.h>
#include <linux/mm_types.h>
+#include <linux/audit.h>

#include <asm/thread_info.h>

@@ -120,7 +121,8 @@
#ifdef CONFIG_AUDITSYSCALL
#define INIT_IDS \
.loginuid = INVALID_UID, \
- .sessionid = (unsigned int)-1,
+ .sessionid = (unsigned int)-1, \
+ .containerid = INVALID_CID,
#else
#define INIT_IDS
#endif
diff --git a/include/linux/sched.h b/include/linux/sched.h
index d258826..1b82191 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -796,6 +796,7 @@ struct task_struct {
#ifdef CONFIG_AUDITSYSCALL
kuid_t loginuid;
unsigned int sessionid;
+ u64 containerid;
#endif
struct seccomp seccomp;

diff --git a/include/uapi/linux/audit.h b/include/uapi/linux/audit.h
index 4e61a9e..921a71f 100644
--- a/include/uapi/linux/audit.h
+++ b/include/uapi/linux/audit.h
@@ -71,6 +71,7 @@
#define AUDIT_TTY_SET 1017 /* Set TTY auditing status */
#define AUDIT_SET_FEATURE 1018 /* Turn an audit feature on or off */
#define AUDIT_GET_FEATURE 1019 /* Get which features are enabled */
+#define AUDIT_CONTAINER 1020 /* Define the container id and information */

#define AUDIT_FIRST_USER_MSG 1100 /* Userspace messages mostly uninteresting to kernel */
#define AUDIT_USER_AVC 1107 /* We filter this differently */
@@ -465,6 +466,7 @@ struct audit_tty_status {
};

#define AUDIT_UID_UNSET (unsigned int)-1
+#define AUDIT_CID_UNSET ((u64)-1)

/* audit_rule_data supports filter rules with both integer and string
* fields. It corresponds with AUDIT_ADD_RULE, AUDIT_DEL_RULE and
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 4e0a4ac..29c8482 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -2073,6 +2073,90 @@ int audit_set_loginuid(kuid_t loginuid)
return rc;
}

+static int audit_set_containerid_perm(struct task_struct *task, u64 containerid)
+{
+ struct task_struct *parent;
+ u64 pcontainerid, ccontainerid;
+
+ /* Don't allow to set our own containerid */
+ if (current == task)
+ return -EPERM;
+ /* Don't allow the containerid to be unset */
+ if (!cid_valid(containerid))
+ return -EINVAL;
+ /* if we don't have caps, reject */
+ if (!capable(CAP_AUDIT_CONTROL))
+ return -EPERM;
+ /* if containerid is unset, allow */
+ if (!audit_containerid_set(task))
+ return 0;
+ /* it is already set, and not inherited from the parent, reject */
+ ccontainerid = audit_get_containerid(task);
+ rcu_read_lock();
+ parent = rcu_dereference(task->real_parent);
+ rcu_read_unlock();
+ task_lock(parent);
+ pcontainerid = audit_get_containerid(parent);
+ task_unlock(parent);
+ if (ccontainerid != pcontainerid)
+ return -EPERM;
+ return 0;
+}
+
+static void audit_log_set_containerid(struct task_struct *task, u64 oldcontainerid,
+ u64 containerid, int rc)
+{
+ struct audit_buffer *ab;
+ uid_t uid;
+ struct tty_struct *tty;
+
+ if (!audit_enabled)
+ return;
+
+ ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_CONTAINER);
+ if (!ab)
+ return;
+
+ uid = from_kuid(&init_user_ns, task_uid(current));
+ tty = audit_get_tty(current);
+
+ audit_log_format(ab, "op=set pid=%d uid=%u", task_tgid_nr(current), uid);
+ audit_log_task_context(ab);
+ audit_log_format(ab, " auid=%u tty=%s ses=%u opid=%d old-contid=%llu contid=%llu res=%d",
+ from_kuid(&init_user_ns, audit_get_loginuid(current)),
+ tty ? tty_name(tty) : "(none)", audit_get_sessionid(current),
+ task_tgid_nr(task), oldcontainerid, containerid, !rc);
+
+ audit_put_tty(tty);
+ audit_log_end(ab);
+}
+
+/**
+ * audit_set_containerid - set current task's audit_context containerid
+ * @containerid: containerid value
+ *
+ * Returns 0 on success, -EPERM on permission failure.
+ *
+ * Called (set) from fs/proc/base.c::proc_containerid_write().
+ */
+int audit_set_containerid(struct task_struct *task, u64 containerid)
+{
+ u64 oldcontainerid;
+ int rc;
+
+ oldcontainerid = audit_get_containerid(task);
+
+ rc = audit_set_containerid_perm(task, containerid);
+ if (!rc) {
+ task_lock(task);
+ task->containerid = containerid;
+ task_unlock(task);
+ }
+
+ audit_log_set_containerid(task, oldcontainerid, containerid, rc);
+ return rc;
+}
+
/**
* __audit_mq_open - record audit data for a POSIX MQ open
* @oflag: open flag
--
1.8.3.1
Richard Guy Briggs
2018-03-16 09:00:29 UTC
Permalink
Raw Message
Check if a task has existing children or co-threads and refuse to set
the container ID if either are present. Failure to check this could
permit games where a child scratches its parent's back to work around
inheritance and double-setting policy.

Signed-off-by: Richard Guy Briggs <***@redhat.com>
---
kernel/auditsc.c | 4 ++++
1 file changed, 4 insertions(+)

diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 29c8482..a6b0a52 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -2087,6 +2087,10 @@ static int audit_set_containerid_perm(struct task_struct *task, u64 containerid)
/* if we don't have caps, reject */
if (!capable(CAP_AUDIT_CONTROL))
return -EPERM;
+ /* if task has children or is not single-threaded, deny */
+ if (!list_empty(&task->children) ||
+ !(thread_group_leader(task) && thread_group_empty(task)))
+ return -EPERM;
/* if containerid is unset, allow */
if (!audit_containerid_set(task))
return 0;
--
1.8.3.1
Paul Moore
2018-04-19 00:11:30 UTC
Permalink
Raw Message
Post by Richard Guy Briggs
Check if a task has existing children or co-threads and refuse to set
the container ID if either are present. Failure to check this could
permit games where a child scratches its parent's back to work around
inheritance and double-setting policy.
---
kernel/auditsc.c | 4 ++++
1 file changed, 4 insertions(+)
I would just include this in patch 1/2 as I can't think of world where
we wouldn't this check.
Post by Richard Guy Briggs
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 29c8482..a6b0a52 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -2087,6 +2087,10 @@ static int audit_set_containerid_perm(struct task_struct *task, u64 containerid)
/* if we don't have caps, reject */
if (!capable(CAP_AUDIT_CONTROL))
return -EPERM;
+ /* if task has children or is not single-threaded, deny */
+ if (!list_empty(&task->children) ||
+ !(thread_group_leader(task) && thread_group_empty(task)))
+ return -EPERM;
/* if containerid is unset, allow */
if (!audit_containerid_set(task))
return 0;
--
1.8.3.1
--
paul moore
www.paul-moore.com
Richard Guy Briggs
2018-03-16 09:00:30 UTC
Permalink
Raw Message
Create a new audit record AUDIT_CONTAINER_INFO to document the container
ID of a process if it is present.

Called from audit_log_exit(), syscalls are covered.

A sample raw event:
type=SYSCALL msg=audit(1519924845.499:257): arch=c000003e syscall=257 success=yes exit=3 a0=ffffff9c a1=56374e1cef30 a2=241 a3=1b6 items=2 ppid=606 pid=635 auid=0 uid=0 gid=0 euid=0 suid=0 fsuid=0 egid=0 sgid=0 fsgid=0 tty=pts0 ses=3 comm="bash" exe="/usr/bin/bash" subj=unconfined_u:unconfined_r:unconfined_t:s0-s0:c0.c1023 key="tmpcontainerid"
type=CWD msg=audit(1519924845.499:257): cwd="/root"
type=PATH msg=audit(1519924845.499:257): item=0 name="/tmp/" inode=13863 dev=00:27 mode=041777 ouid=0 ogid=0 rdev=00:00 obj=system_u:object_r:tmp_t:s0 nametype= PARENT cap_fp=0000000000000000 cap_fi=0000000000000000 cap_fe=0 cap_fver=0
type=PATH msg=audit(1519924845.499:257): item=1 name="/tmp/tmpcontainerid" inode=17729 dev=00:27 mode=0100644 ouid=0 ogid=0 rdev=00:00 obj=unconfined_u:object_r:user_tmp_t:s0 nametype=CREATE cap_fp=0000000000000000 cap_fi=0000000000000000 cap_fe=0 cap_fver=0
type=PROCTITLE msg=audit(1519924845.499:257): proctitle=62617368002D6300736C65657020313B206563686F2074657374203E202F746D702F746D70636F6E7461696E65726964
type=CONTAINER_INFO msg=audit(1519924845.499:257): op=task contid=123458

See: https://github.com/linux-audit/audit-kernel/issues/32
Signed-off-by: Richard Guy Briggs <***@redhat.com>
---
include/linux/audit.h | 5 +++++
include/uapi/linux/audit.h | 1 +
kernel/audit.c | 20 ++++++++++++++++++++
kernel/auditsc.c | 2 ++
4 files changed, 28 insertions(+)

diff --git a/include/linux/audit.h b/include/linux/audit.h
index fe4ba3f..3acbe9d 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -154,6 +154,8 @@ extern void audit_log_link_denied(const char *operation,
extern int audit_log_task_context(struct audit_buffer *ab);
extern void audit_log_task_info(struct audit_buffer *ab,
struct task_struct *tsk);
+extern int audit_log_container_info(struct task_struct *tsk,
+ struct audit_context *context);

extern int audit_update_lsm_rules(void);

@@ -205,6 +207,9 @@ static inline int audit_log_task_context(struct audit_buffer *ab)
static inline void audit_log_task_info(struct audit_buffer *ab,
struct task_struct *tsk)
{ }
+static inline int audit_log_container_info(struct task_struct *tsk,
+ struct audit_context *context);
+{ }
#define audit_enabled 0
#endif /* CONFIG_AUDIT */

diff --git a/include/uapi/linux/audit.h b/include/uapi/linux/audit.h
index 921a71f..e83ccbd 100644
--- a/include/uapi/linux/audit.h
+++ b/include/uapi/linux/audit.h
@@ -115,6 +115,7 @@
#define AUDIT_REPLACE 1329 /* Replace auditd if this packet unanswerd */
#define AUDIT_KERN_MODULE 1330 /* Kernel Module events */
#define AUDIT_FANOTIFY 1331 /* Fanotify access decision */
+#define AUDIT_CONTAINER_INFO 1332 /* Container ID information */

#define AUDIT_AVC 1400 /* SE Linux avc denial or grant */
#define AUDIT_SELINUX_ERR 1401 /* Internal SE Linux Errors */
diff --git a/kernel/audit.c b/kernel/audit.c
index 3f2f143..a12f21f 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -2049,6 +2049,26 @@ void audit_log_session_info(struct audit_buffer *ab)
audit_log_format(ab, " auid=%u ses=%u", auid, sessionid);
}

+/*
+ * audit_log_container_info - report container info
+ * @tsk: task to be recorded
+ * @context: task or local context for record
+ */
+int audit_log_container_info(struct task_struct *tsk, struct audit_context *context)
+{
+ struct audit_buffer *ab;
+
+ if (!audit_containerid_set(tsk))
+ return 0;
+ /* Generate AUDIT_CONTAINER_INFO with container ID */
+ ab = audit_log_start(context, GFP_KERNEL, AUDIT_CONTAINER_INFO);
+ if (!ab)
+ return -ENOMEM;
+ audit_log_format(ab, "contid=%llu", audit_get_containerid(tsk));
+ audit_log_end(ab);
+ return 0;
+}
+
void audit_log_key(struct audit_buffer *ab, char *key)
{
audit_log_format(ab, " key=");
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index a6b0a52..65be110 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -1453,6 +1453,8 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts

audit_log_proctitle(tsk, context);

+ audit_log_container_info(tsk, context);
+
/* Send end of event record to help user space know we are finished */
ab = audit_log_start(context, GFP_KERNEL, AUDIT_EOE);
if (ab)
--
1.8.3.1
Richard Guy Briggs
2018-05-17 21:41:02 UTC
Permalink
Raw Message
On Fri, 16 Mar 2018 05:00:30 -0400
Post by Richard Guy Briggs
Create a new audit record AUDIT_CONTAINER_INFO to document the
container ID of a process if it is present.
As mentioned in a previous email, I think AUDIT_CONTAINER is more
suitable for the container record. One more comment below...
Post by Richard Guy Briggs
Called from audit_log_exit(), syscalls are covered.
type=SYSCALL msg=audit(1519924845.499:257): arch=c000003e syscall=257
success=yes exit=3 a0=ffffff9c a1=56374e1cef30 a2=241 a3=1b6 items=2
ppid=606 pid=635 auid=0 uid=0 gid=0 euid=0 suid=0 fsuid=0 egid=0
sgid=0 fsgid=0 tty=pts0 ses=3 comm="bash" exe="/usr/bin/bash"
subj=unconfined_u:unconfined_r:unconfined_t:s0-s0:c0.c1023
cwd="/root" type=PATH msg=audit(1519924845.499:257): item=0
name="/tmp/" inode=13863 dev=00:27 mode=041777 ouid=0 ogid=0
rdev=00:00 obj=system_u:object_r:tmp_t:s0 nametype= PARENT
cap_fp=0000000000000000 cap_fi=0000000000000000 cap_fe=0 cap_fver=0
type=PATH msg=audit(1519924845.499:257): item=1
name="/tmp/tmpcontainerid" inode=17729 dev=00:27 mode=0100644 ouid=0
ogid=0 rdev=00:00 obj=unconfined_u:object_r:user_tmp_t:s0
nametype=CREATE cap_fp=0000000000000000 cap_fi=0000000000000000
proctitle=62617368002D6300736C65657020313B206563686F2074657374203E202F746D702F746D70636F6E7461696E65726964
type=CONTAINER_INFO msg=audit(1519924845.499:257): op=task
contid=123458
See: https://github.com/linux-audit/audit-kernel/issues/32
---
include/linux/audit.h | 5 +++++
include/uapi/linux/audit.h | 1 +
kernel/audit.c | 20 ++++++++++++++++++++
kernel/auditsc.c | 2 ++
4 files changed, 28 insertions(+)
diff --git a/include/linux/audit.h b/include/linux/audit.h
index fe4ba3f..3acbe9d 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -154,6 +154,8 @@ extern void
audit_log_link_denied(const char *operation, extern int
audit_log_task_context(struct audit_buffer *ab); extern void
audit_log_task_info(struct audit_buffer *ab, struct task_struct *tsk);
+extern int audit_log_container_info(struct task_struct *tsk,
+ struct audit_context *context);
extern int audit_update_lsm_rules(void);
@@ -205,6 +207,9 @@ static inline int audit_log_task_context(struct
audit_buffer *ab) static inline void audit_log_task_info(struct
audit_buffer *ab, struct task_struct *tsk)
{ }
+static inline int audit_log_container_info(struct task_struct *tsk,
+ struct audit_context
*context); +{ }
#define audit_enabled 0
#endif /* CONFIG_AUDIT */
diff --git a/include/uapi/linux/audit.h b/include/uapi/linux/audit.h
index 921a71f..e83ccbd 100644
--- a/include/uapi/linux/audit.h
+++ b/include/uapi/linux/audit.h
@@ -115,6 +115,7 @@
#define AUDIT_REPLACE 1329 /* Replace auditd
if this packet unanswerd */ #define AUDIT_KERN_MODULE
1330 /* Kernel Module events */ #define
AUDIT_FANOTIFY 1331 /* Fanotify access decision
*/ +#define AUDIT_CONTAINER_INFO 1332 /* Container ID
information */ #define AUDIT_AVC 1400 /* SE
Linux avc denial or grant */ #define AUDIT_SELINUX_ERR
1401 /* Internal SE Linux Errors */ diff --git
a/kernel/audit.c b/kernel/audit.c index 3f2f143..a12f21f 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -2049,6 +2049,26 @@ void audit_log_session_info(struct
audit_buffer *ab) audit_log_format(ab, " auid=%u ses=%u", auid,
sessionid); }
+/*
+ * audit_log_container_info - report container info
+ */
+int audit_log_container_info(struct task_struct *tsk, struct
audit_context *context) +{
+ struct audit_buffer *ab;
+
+ if (!audit_containerid_set(tsk))
+ return 0;
+ /* Generate AUDIT_CONTAINER_INFO with container ID */
+ ab = audit_log_start(context, GFP_KERNEL,
AUDIT_CONTAINER_INFO);
+ if (!ab)
+ return -ENOMEM;
+ audit_log_format(ab, "contid=%llu",
audit_get_containerid(tsk));
+ audit_log_end(ab);
+ return 0;
+}
+
void audit_log_key(struct audit_buffer *ab, char *key)
{
audit_log_format(ab, " key=");
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index a6b0a52..65be110 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -1453,6 +1453,8 @@ static void audit_log_exit(struct audit_context
*context, struct task_struct *ts
audit_log_proctitle(tsk, context);
+ audit_log_container_info(tsk, context);
Would there be any problem moving audit_log_container_info before
audit_log_proctitle? There are some assumptions that proctitle is the
last record in some situations.
I see no problem doing that.
Thanks,
-Steve
Post by Richard Guy Briggs
/* Send end of event record to help user space know we are
finished */ ab = audit_log_start(context, GFP_KERNEL, AUDIT_EOE);
if (ab)
- RGB

--
Richard Guy Briggs <***@redhat.com>
Sr. S/W Engineer, Kernel Security, Base Operating Systems
Remote, Ottawa, Red Hat Canada
IRC: rgb, SunRaycer
Voice: +1.647.777.2635, Internal: (81) 32635
Steve Grubb
2018-05-21 19:19:10 UTC
Permalink
Raw Message
Post by Richard Guy Briggs
On Fri, 16 Mar 2018 05:00:30 -0400
Post by Richard Guy Briggs
Create a new audit record AUDIT_CONTAINER_INFO to document the
container ID of a process if it is present.
As mentioned in a previous email, I think AUDIT_CONTAINER is more
suitable for the container record. One more comment below...
Post by Richard Guy Briggs
Called from audit_log_exit(), syscalls are covered.
type=SYSCALL msg=audit(1519924845.499:257): arch=c000003e syscall=257
success=yes exit=3 a0=ffffff9c a1=56374e1cef30 a2=241 a3=1b6 items=2
ppid=606 pid=635 auid=0 uid=0 gid=0 euid=0 suid=0 fsuid=0 egid=0
sgid=0 fsgid=0 tty=pts0 ses=3 comm="bash" exe="/usr/bin/bash"
subj=unconfined_u:unconfined_r:unconfined_t:s0-s0:c0.c1023
cwd="/root" type=PATH msg=audit(1519924845.499:257): item=0
name="/tmp/" inode=13863 dev=00:27 mode=041777 ouid=0 ogid=0
rdev=00:00 obj=system_u:object_r:tmp_t:s0 nametype= PARENT
cap_fp=0000000000000000 cap_fi=0000000000000000 cap_fe=0 cap_fver=0
type=PATH msg=audit(1519924845.499:257): item=1
name="/tmp/tmpcontainerid" inode=17729 dev=00:27 mode=0100644 ouid=0
ogid=0 rdev=00:00 obj=unconfined_u:object_r:user_tmp_t:s0
nametype=CREATE cap_fp=0000000000000000 cap_fi=0000000000000000
proctitle=62617368002D6300736C65657020313B206563686F2074657374203E202F7
46D702F746D70636F6E7461696E65726964 type=CONTAINER_INFO
msg=audit(1519924845.499:257): op=task
contid=123458
See: https://github.com/linux-audit/audit-kernel/issues/32
---
include/linux/audit.h | 5 +++++
include/uapi/linux/audit.h | 1 +
kernel/audit.c | 20 ++++++++++++++++++++
kernel/auditsc.c | 2 ++
4 files changed, 28 insertions(+)
diff --git a/include/linux/audit.h b/include/linux/audit.h
index fe4ba3f..3acbe9d 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -154,6 +154,8 @@ extern void
audit_log_link_denied(const char *operation, extern int
audit_log_task_context(struct audit_buffer *ab); extern void
audit_log_task_info(struct audit_buffer *ab, struct task_struct *tsk);
+extern int audit_log_container_info(struct task_struct *tsk,
+ struct audit_context *context);
extern int audit_update_lsm_rules(void);
@@ -205,6 +207,9 @@ static inline int audit_log_task_context(struct
audit_buffer *ab) static inline void audit_log_task_info(struct
audit_buffer *ab, struct task_struct *tsk)
{ }
+static inline int audit_log_container_info(struct task_struct *tsk,
+ struct audit_context
*context); +{ }
#define audit_enabled 0
#endif /* CONFIG_AUDIT */
diff --git a/include/uapi/linux/audit.h b/include/uapi/linux/audit.h
index 921a71f..e83ccbd 100644
--- a/include/uapi/linux/audit.h
+++ b/include/uapi/linux/audit.h
@@ -115,6 +115,7 @@
#define AUDIT_REPLACE 1329 /* Replace auditd
if this packet unanswerd */ #define AUDIT_KERN_MODULE
1330 /* Kernel Module events */ #define
AUDIT_FANOTIFY 1331 /* Fanotify access decision
*/ +#define AUDIT_CONTAINER_INFO 1332 /* Container ID
information */ #define AUDIT_AVC 1400 /* SE
Linux avc denial or grant */ #define AUDIT_SELINUX_ERR
1401 /* Internal SE Linux Errors */ diff --git
a/kernel/audit.c b/kernel/audit.c index 3f2f143..a12f21f 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -2049,6 +2049,26 @@ void audit_log_session_info(struct
audit_buffer *ab) audit_log_format(ab, " auid=%u ses=%u", auid,
sessionid); }
+/*
+ * audit_log_container_info - report container info
+ */
+int audit_log_container_info(struct task_struct *tsk, struct
audit_context *context) +{
+ struct audit_buffer *ab;
+
+ if (!audit_containerid_set(tsk))
+ return 0;
+ /* Generate AUDIT_CONTAINER_INFO with container ID */
+ ab = audit_log_start(context, GFP_KERNEL,
AUDIT_CONTAINER_INFO);
+ if (!ab)
+ return -ENOMEM;
+ audit_log_format(ab, "contid=%llu",
audit_get_containerid(tsk));
+ audit_log_end(ab);
+ return 0;
+}
+
void audit_log_key(struct audit_buffer *ab, char *key)
{
audit_log_format(ab, " key=");
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index a6b0a52..65be110 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -1453,6 +1453,8 @@ static void audit_log_exit(struct audit_context
*context, struct task_struct *ts
audit_log_proctitle(tsk, context);
+ audit_log_container_info(tsk, context);
Would there be any problem moving audit_log_container_info before
audit_log_proctitle? There are some assumptions that proctitle is the
last record in some situations.
I see no problem doing that.
Actually...just leave it as is. I have to fix things for simple events and
they do not have a proctitle record. So, leave it as you intended and I'll
work around this on my end.

Thanks,
-Steve
Post by Richard Guy Briggs
Post by Richard Guy Briggs
/* Send end of event record to help user space know we are
finished */ ab = audit_log_start(context, GFP_KERNEL, AUDIT_EOE);
if (ab)
- RGB
--
Sr. S/W Engineer, Kernel Security, Base Operating Systems
Remote, Ottawa, Red Hat Canada
IRC: rgb, SunRaycer
Voice: +1.647.777.2635, Internal: (81) 32635
Richard Guy Briggs
2018-03-16 09:00:31 UTC
Permalink
Raw Message
Implement container ID filtering using the AUDIT_CONTAINERID field name
to send an 8-character string representing a u64 since the value field
is only u32.

Sending it as two u32 was considered, but gathering and comparing two
fields was more complex.

The feature indicator is AUDIT_FEATURE_BITMAP_CONTAINERID_FILTER.

This requires support from userspace to be useful.
See: https://github.com/linux-audit/audit-userspace/issues/40
Signed-off-by: Richard Guy Briggs <***@redhat.com>
---
include/linux/audit.h | 1 +
include/uapi/linux/audit.h | 5 ++++-
kernel/audit.h | 1 +
kernel/auditfilter.c | 47 ++++++++++++++++++++++++++++++++++++++++++++++
kernel/auditsc.c | 3 +++
5 files changed, 56 insertions(+), 1 deletion(-)

diff --git a/include/linux/audit.h b/include/linux/audit.h
index 3acbe9d..f10ca1b 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -76,6 +76,7 @@ struct audit_field {
u32 type;
union {
u32 val;
+ u64 val64;
kuid_t uid;
kgid_t gid;
struct {
diff --git a/include/uapi/linux/audit.h b/include/uapi/linux/audit.h
index e83ccbd..8443a8f 100644
--- a/include/uapi/linux/audit.h
+++ b/include/uapi/linux/audit.h
@@ -262,6 +262,7 @@
#define AUDIT_LOGINUID_SET 24
#define AUDIT_SESSIONID 25 /* Session ID */
#define AUDIT_FSTYPE 26 /* FileSystem Type */
+#define AUDIT_CONTAINERID 27 /* Container ID */

/* These are ONLY useful when checking
* at syscall exit time (AUDIT_AT_EXIT). */
@@ -342,6 +343,7 @@ enum {
#define AUDIT_FEATURE_BITMAP_SESSIONID_FILTER 0x00000010
#define AUDIT_FEATURE_BITMAP_LOST_RESET 0x00000020
#define AUDIT_FEATURE_BITMAP_FILTER_FS 0x00000040
+#define AUDIT_FEATURE_BITMAP_CONTAINERID_FILTER 0x00000080

#define AUDIT_FEATURE_BITMAP_ALL (AUDIT_FEATURE_BITMAP_BACKLOG_LIMIT | \
AUDIT_FEATURE_BITMAP_BACKLOG_WAIT_TIME | \
@@ -349,7 +351,8 @@ enum {
AUDIT_FEATURE_BITMAP_EXCLUDE_EXTEND | \
AUDIT_FEATURE_BITMAP_SESSIONID_FILTER | \
AUDIT_FEATURE_BITMAP_LOST_RESET | \
- AUDIT_FEATURE_BITMAP_FILTER_FS)
+ AUDIT_FEATURE_BITMAP_FILTER_FS | \
+ AUDIT_FEATURE_BITMAP_CONTAINERID_FILTER)

/* deprecated: AUDIT_VERSION_* */
#define AUDIT_VERSION_LATEST AUDIT_FEATURE_BITMAP_ALL
diff --git a/kernel/audit.h b/kernel/audit.h
index 214e149..aaa651a 100644
--- a/kernel/audit.h
+++ b/kernel/audit.h
@@ -234,6 +234,7 @@ static inline int audit_hash_ino(u32 ino)

extern int audit_match_class(int class, unsigned syscall);
extern int audit_comparator(const u32 left, const u32 op, const u32 right);
+extern int audit_comparator64(const u64 left, const u32 op, const u64 right);
extern int audit_uid_comparator(kuid_t left, u32 op, kuid_t right);
extern int audit_gid_comparator(kgid_t left, u32 op, kgid_t right);
extern int parent_len(const char *path);
diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c
index d7a807e..c4c8746 100644
--- a/kernel/auditfilter.c
+++ b/kernel/auditfilter.c
@@ -410,6 +410,7 @@ static int audit_field_valid(struct audit_entry *entry, struct audit_field *f)
/* FALL THROUGH */
case AUDIT_ARCH:
case AUDIT_FSTYPE:
+ case AUDIT_CONTAINERID:
if (f->op != Audit_not_equal && f->op != Audit_equal)
return -EINVAL;
break;
@@ -584,6 +585,14 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data,
}
entry->rule.exe = audit_mark;
break;
+ case AUDIT_CONTAINERID:
+ if (f->val != sizeof(u64))
+ goto exit_free;
+ str = audit_unpack_string(&bufp, &remain, f->val);
+ if (IS_ERR(str))
+ goto exit_free;
+ f->val64 = ((u64 *)str)[0];
+ break;
}
}

@@ -666,6 +675,11 @@ static struct audit_rule_data *audit_krule_to_data(struct audit_krule *krule)
data->buflen += data->values[i] =
audit_pack_string(&bufp, audit_mark_path(krule->exe));
break;
+ case AUDIT_CONTAINERID:
+ data->buflen += data->values[i] = sizeof(u64);
+ for (i = 0; i < sizeof(u64); i++)
+ ((char *)bufp)[i] = ((char *)&f->val64)[i];
+ break;
case AUDIT_LOGINUID_SET:
if (krule->pflags & AUDIT_LOGINUID_LEGACY && !f->val) {
data->fields[i] = AUDIT_LOGINUID;
@@ -752,6 +766,10 @@ static int audit_compare_rule(struct audit_krule *a, struct audit_krule *b)
if (!gid_eq(a->fields[i].gid, b->fields[i].gid))
return 1;
break;
+ case AUDIT_CONTAINERID:
+ if (a->fields[i].val64 != b->fields[i].val64)
+ return 1;
+ break;
default:
if (a->fields[i].val != b->fields[i].val)
return 1;
@@ -1210,6 +1228,31 @@ int audit_comparator(u32 left, u32 op, u32 right)
}
}

+int audit_comparator64(u64 left, u32 op, u64 right)
+{
+ switch (op) {
+ case Audit_equal:
+ return (left == right);
+ case Audit_not_equal:
+ return (left != right);
+ case Audit_lt:
+ return (left < right);
+ case Audit_le:
+ return (left <= right);
+ case Audit_gt:
+ return (left > right);
+ case Audit_ge:
+ return (left >= right);
+ case Audit_bitmask:
+ return (left & right);
+ case Audit_bittest:
+ return ((left & right) == right);
+ default:
+ BUG();
+ return 0;
+ }
+}
+
int audit_uid_comparator(kuid_t left, u32 op, kuid_t right)
{
switch (op) {
@@ -1348,6 +1391,10 @@ int audit_filter(int msgtype, unsigned int listtype)
result = audit_comparator(audit_loginuid_set(current),
f->op, f->val);
break;
+ case AUDIT_CONTAINERID:
+ result = audit_comparator64(audit_get_containerid(current),
+ f->op, f->val64);
+ break;
case AUDIT_MSGTYPE:
result = audit_comparator(msgtype, f->op, f->val);
break;
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 65be110..2bba324 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -614,6 +614,9 @@ static int audit_filter_rules(struct task_struct *tsk,
case AUDIT_LOGINUID_SET:
result = audit_comparator(audit_loginuid_set(tsk), f->op, f->val);
break;
+ case AUDIT_CONTAINERID:
+ result = audit_comparator64(audit_get_containerid(tsk), f->op, f->val64);
+ break;
case AUDIT_SUBJ_USER:
case AUDIT_SUBJ_ROLE:
case AUDIT_SUBJ_TYPE:
--
1.8.3.1
Paul Moore
2018-04-19 00:24:27 UTC
Permalink
Raw Message
Post by Richard Guy Briggs
Implement container ID filtering using the AUDIT_CONTAINERID field name
to send an 8-character string representing a u64 since the value field
is only u32.
Sending it as two u32 was considered, but gathering and comparing two
fields was more complex.
My only worry here is that you aren't really sending a string in the
ASCII sense, you are sending an 8 byte buffer (that better be NUL
terminated) that happens to be an unsigned 64-bit integer. To be
clear, I'm okay with that (it's protected by AUDIT_CONTAINERID), and
the code is okay with that, I just want us to pause for a minute and
make sure that is an okay thing to do long term.
Post by Richard Guy Briggs
The feature indicator is AUDIT_FEATURE_BITMAP_CONTAINERID_FILTER.
This requires support from userspace to be useful.
See: https://github.com/linux-audit/audit-userspace/issues/40
---
include/linux/audit.h | 1 +
include/uapi/linux/audit.h | 5 ++++-
kernel/audit.h | 1 +
kernel/auditfilter.c | 47 ++++++++++++++++++++++++++++++++++++++++++++++
kernel/auditsc.c | 3 +++
5 files changed, 56 insertions(+), 1 deletion(-)
diff --git a/include/linux/audit.h b/include/linux/audit.h
index 3acbe9d..f10ca1b 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -76,6 +76,7 @@ struct audit_field {
u32 type;
union {
u32 val;
+ u64 val64;
kuid_t uid;
kgid_t gid;
struct {
diff --git a/include/uapi/linux/audit.h b/include/uapi/linux/audit.h
index e83ccbd..8443a8f 100644
--- a/include/uapi/linux/audit.h
+++ b/include/uapi/linux/audit.h
@@ -262,6 +262,7 @@
#define AUDIT_LOGINUID_SET 24
#define AUDIT_SESSIONID 25 /* Session ID */
#define AUDIT_FSTYPE 26 /* FileSystem Type */
+#define AUDIT_CONTAINERID 27 /* Container ID */
/* These are ONLY useful when checking
* at syscall exit time (AUDIT_AT_EXIT). */
@@ -342,6 +343,7 @@ enum {
#define AUDIT_FEATURE_BITMAP_SESSIONID_FILTER 0x00000010
#define AUDIT_FEATURE_BITMAP_LOST_RESET 0x00000020
#define AUDIT_FEATURE_BITMAP_FILTER_FS 0x00000040
+#define AUDIT_FEATURE_BITMAP_CONTAINERID_FILTER 0x00000080
#define AUDIT_FEATURE_BITMAP_ALL (AUDIT_FEATURE_BITMAP_BACKLOG_LIMIT | \
AUDIT_FEATURE_BITMAP_BACKLOG_WAIT_TIME | \
@@ -349,7 +351,8 @@ enum {
AUDIT_FEATURE_BITMAP_EXCLUDE_EXTEND | \
AUDIT_FEATURE_BITMAP_SESSIONID_FILTER | \
AUDIT_FEATURE_BITMAP_LOST_RESET | \
- AUDIT_FEATURE_BITMAP_FILTER_FS)
+ AUDIT_FEATURE_BITMAP_FILTER_FS | \
+ AUDIT_FEATURE_BITMAP_CONTAINERID_FILTER)
/* deprecated: AUDIT_VERSION_* */
#define AUDIT_VERSION_LATEST AUDIT_FEATURE_BITMAP_ALL
diff --git a/kernel/audit.h b/kernel/audit.h
index 214e149..aaa651a 100644
--- a/kernel/audit.h
+++ b/kernel/audit.h
@@ -234,6 +234,7 @@ static inline int audit_hash_ino(u32 ino)
extern int audit_match_class(int class, unsigned syscall);
extern int audit_comparator(const u32 left, const u32 op, const u32 right);
+extern int audit_comparator64(const u64 left, const u32 op, const u64 right);
extern int audit_uid_comparator(kuid_t left, u32 op, kuid_t right);
extern int audit_gid_comparator(kgid_t left, u32 op, kgid_t right);
extern int parent_len(const char *path);
diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c
index d7a807e..c4c8746 100644
--- a/kernel/auditfilter.c
+++ b/kernel/auditfilter.c
@@ -410,6 +410,7 @@ static int audit_field_valid(struct audit_entry *entry, struct audit_field *f)
/* FALL THROUGH */
if (f->op != Audit_not_equal && f->op != Audit_equal)
return -EINVAL;
break;
@@ -584,6 +585,14 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data,
}
entry->rule.exe = audit_mark;
break;
+ if (f->val != sizeof(u64))
+ goto exit_free;
+ str = audit_unpack_string(&bufp, &remain, f->val);
+ if (IS_ERR(str))
+ goto exit_free;
+ f->val64 = ((u64 *)str)[0];
+ break;
}
}
@@ -666,6 +675,11 @@ static struct audit_rule_data *audit_krule_to_data(struct audit_krule *krule)
data->buflen += data->values[i] =
audit_pack_string(&bufp, audit_mark_path(krule->exe));
break;
+ data->buflen += data->values[i] = sizeof(u64);
+ for (i = 0; i < sizeof(u64); i++)
+ ((char *)bufp)[i] = ((char *)&f->val64)[i];
+ break;
if (krule->pflags & AUDIT_LOGINUID_LEGACY && !f->val) {
data->fields[i] = AUDIT_LOGINUID;
@@ -752,6 +766,10 @@ static int audit_compare_rule(struct audit_krule *a, struct audit_krule *b)
if (!gid_eq(a->fields[i].gid, b->fields[i].gid))
return 1;
break;
+ if (a->fields[i].val64 != b->fields[i].val64)
+ return 1;
+ break;
if (a->fields[i].val != b->fields[i].val)
return 1;
@@ -1210,6 +1228,31 @@ int audit_comparator(u32 left, u32 op, u32 right)
}
}
+int audit_comparator64(u64 left, u32 op, u64 right)
+{
+ switch (op) {
+ return (left == right);
+ return (left != right);
+ return (left < right);
+ return (left <= right);
+ return (left > right);
+ return (left >= right);
+ return (left & right);
+ return ((left & right) == right);
+ BUG();
+ return 0;
+ }
+}
+
int audit_uid_comparator(kuid_t left, u32 op, kuid_t right)
{
switch (op) {
@@ -1348,6 +1391,10 @@ int audit_filter(int msgtype, unsigned int listtype)
result = audit_comparator(audit_loginuid_set(current),
f->op, f->val);
break;
+ result = audit_comparator64(audit_get_containerid(current),
+ f->op, f->val64);
+ break;
result = audit_comparator(msgtype, f->op, f->val);
break;
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 65be110..2bba324 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -614,6 +614,9 @@ static int audit_filter_rules(struct task_struct *tsk,
result = audit_comparator(audit_loginuid_set(tsk), f->op, f->val);
break;
+ result = audit_comparator64(audit_get_containerid(tsk), f->op, f->val64);
+ break;
--
1.8.3.1
--
Linux-audit mailing list
https://www.redhat.com/mailman/listinfo/linux-audit
--
paul moore
www.paul-moore.com
Richard Guy Briggs
2018-04-19 12:17:38 UTC
Permalink
Raw Message
Post by Paul Moore
Post by Richard Guy Briggs
Implement container ID filtering using the AUDIT_CONTAINERID field name
to send an 8-character string representing a u64 since the value field
is only u32.
Sending it as two u32 was considered, but gathering and comparing two
fields was more complex.
My only worry here is that you aren't really sending a string in the
ASCII sense, you are sending an 8 byte buffer (that better be NUL
terminated) that happens to be an unsigned 64-bit integer. To be
clear, I'm okay with that (it's protected by AUDIT_CONTAINERID), and
the code is okay with that, I just want us to pause for a minute and
make sure that is an okay thing to do long term.
I already went through that process and warned of it 7 weeks ago. As
already noted, That was preferable to two seperate u32 fields that
depend on each other making comparisons more complicated. Using two
seperate fields to configure the rule could be gated for validity, then
the result stored in a special rule field, but I wasn't keen about that
approach.
Post by Paul Moore
Post by Richard Guy Briggs
The feature indicator is AUDIT_FEATURE_BITMAP_CONTAINERID_FILTER.
This requires support from userspace to be useful.
See: https://github.com/linux-audit/audit-userspace/issues/40
---
include/linux/audit.h | 1 +
include/uapi/linux/audit.h | 5 ++++-
kernel/audit.h | 1 +
kernel/auditfilter.c | 47 ++++++++++++++++++++++++++++++++++++++++++++++
kernel/auditsc.c | 3 +++
5 files changed, 56 insertions(+), 1 deletion(-)
diff --git a/include/linux/audit.h b/include/linux/audit.h
index 3acbe9d..f10ca1b 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -76,6 +76,7 @@ struct audit_field {
u32 type;
union {
u32 val;
+ u64 val64;
kuid_t uid;
kgid_t gid;
struct {
diff --git a/include/uapi/linux/audit.h b/include/uapi/linux/audit.h
index e83ccbd..8443a8f 100644
--- a/include/uapi/linux/audit.h
+++ b/include/uapi/linux/audit.h
@@ -262,6 +262,7 @@
#define AUDIT_LOGINUID_SET 24
#define AUDIT_SESSIONID 25 /* Session ID */
#define AUDIT_FSTYPE 26 /* FileSystem Type */
+#define AUDIT_CONTAINERID 27 /* Container ID */
/* These are ONLY useful when checking
* at syscall exit time (AUDIT_AT_EXIT). */
@@ -342,6 +343,7 @@ enum {
#define AUDIT_FEATURE_BITMAP_SESSIONID_FILTER 0x00000010
#define AUDIT_FEATURE_BITMAP_LOST_RESET 0x00000020
#define AUDIT_FEATURE_BITMAP_FILTER_FS 0x00000040
+#define AUDIT_FEATURE_BITMAP_CONTAINERID_FILTER 0x00000080
#define AUDIT_FEATURE_BITMAP_ALL (AUDIT_FEATURE_BITMAP_BACKLOG_LIMIT | \
AUDIT_FEATURE_BITMAP_BACKLOG_WAIT_TIME | \
@@ -349,7 +351,8 @@ enum {
AUDIT_FEATURE_BITMAP_EXCLUDE_EXTEND | \
AUDIT_FEATURE_BITMAP_SESSIONID_FILTER | \
AUDIT_FEATURE_BITMAP_LOST_RESET | \
- AUDIT_FEATURE_BITMAP_FILTER_FS)
+ AUDIT_FEATURE_BITMAP_FILTER_FS | \
+ AUDIT_FEATURE_BITMAP_CONTAINERID_FILTER)
/* deprecated: AUDIT_VERSION_* */
#define AUDIT_VERSION_LATEST AUDIT_FEATURE_BITMAP_ALL
diff --git a/kernel/audit.h b/kernel/audit.h
index 214e149..aaa651a 100644
--- a/kernel/audit.h
+++ b/kernel/audit.h
@@ -234,6 +234,7 @@ static inline int audit_hash_ino(u32 ino)
extern int audit_match_class(int class, unsigned syscall);
extern int audit_comparator(const u32 left, const u32 op, const u32 right);
+extern int audit_comparator64(const u64 left, const u32 op, const u64 right);
extern int audit_uid_comparator(kuid_t left, u32 op, kuid_t right);
extern int audit_gid_comparator(kgid_t left, u32 op, kgid_t right);
extern int parent_len(const char *path);
diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c
index d7a807e..c4c8746 100644
--- a/kernel/auditfilter.c
+++ b/kernel/auditfilter.c
@@ -410,6 +410,7 @@ static int audit_field_valid(struct audit_entry *entry, struct audit_field *f)
/* FALL THROUGH */
if (f->op != Audit_not_equal && f->op != Audit_equal)
return -EINVAL;
break;
@@ -584,6 +585,14 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data,
}
entry->rule.exe = audit_mark;
break;
+ if (f->val != sizeof(u64))
+ goto exit_free;
+ str = audit_unpack_string(&bufp, &remain, f->val);
+ if (IS_ERR(str))
+ goto exit_free;
+ f->val64 = ((u64 *)str)[0];
+ break;
}
}
@@ -666,6 +675,11 @@ static struct audit_rule_data *audit_krule_to_data(struct audit_krule *krule)
data->buflen += data->values[i] =
audit_pack_string(&bufp, audit_mark_path(krule->exe));
break;
+ data->buflen += data->values[i] = sizeof(u64);
+ for (i = 0; i < sizeof(u64); i++)
+ ((char *)bufp)[i] = ((char *)&f->val64)[i];
+ break;
if (krule->pflags & AUDIT_LOGINUID_LEGACY && !f->val) {
data->fields[i] = AUDIT_LOGINUID;
@@ -752,6 +766,10 @@ static int audit_compare_rule(struct audit_krule *a, struct audit_krule *b)
if (!gid_eq(a->fields[i].gid, b->fields[i].gid))
return 1;
break;
+ if (a->fields[i].val64 != b->fields[i].val64)
+ return 1;
+ break;
if (a->fields[i].val != b->fields[i].val)
return 1;
@@ -1210,6 +1228,31 @@ int audit_comparator(u32 left, u32 op, u32 right)
}
}
+int audit_comparator64(u64 left, u32 op, u64 right)
+{
+ switch (op) {
+ return (left == right);
+ return (left != right);
+ return (left < right);
+ return (left <= right);
+ return (left > right);
+ return (left >= right);
+ return (left & right);
+ return ((left & right) == right);
+ BUG();
+ return 0;
+ }
+}
+
int audit_uid_comparator(kuid_t left, u32 op, kuid_t right)
{
switch (op) {
@@ -1348,6 +1391,10 @@ int audit_filter(int msgtype, unsigned int listtype)
result = audit_comparator(audit_loginuid_set(current),
f->op, f->val);
break;
+ result = audit_comparator64(audit_get_containerid(current),
+ f->op, f->val64);
+ break;
result = audit_comparator(msgtype, f->op, f->val);
break;
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 65be110..2bba324 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -614,6 +614,9 @@ static int audit_filter_rules(struct task_struct *tsk,
result = audit_comparator(audit_loginuid_set(tsk), f->op, f->val);
break;
+ result = audit_comparator64(audit_get_containerid(tsk), f->op, f->val64);
+ break;
--
1.8.3.1
--
Linux-audit mailing list
https://www.redhat.com/mailman/listinfo/linux-audit
--
paul moore
www.paul-moore.com
- RGB

--
Richard Guy Briggs <***@redhat.com>
Sr. S/W Engineer, Kernel Security, Base Operating Systems
Remote, Ottawa, Red Hat Canada
IRC: rgb, SunRaycer
Voice: +1.647.777.2635, Internal: (81) 32635
Richard Guy Briggs
2018-03-16 09:00:35 UTC
Permalink
Raw Message
Add container ID auxiliary record to tty logging rule event standalone
records.

Signed-off-by: Richard Guy Briggs <***@redhat.com>
---
drivers/tty/tty_audit.c | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/tty/tty_audit.c b/drivers/tty/tty_audit.c
index e30aa6b..48ee4b7 100644
--- a/drivers/tty/tty_audit.c
+++ b/drivers/tty/tty_audit.c
@@ -66,8 +66,9 @@ static void tty_audit_log(const char *description, dev_t dev,
uid_t uid = from_kuid(&init_user_ns, task_uid(tsk));
uid_t loginuid = from_kuid(&init_user_ns, audit_get_loginuid(tsk));
unsigned int sessionid = audit_get_sessionid(tsk);
+ struct audit_context *context = audit_alloc_local();

- ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_TTY);
+ ab = audit_log_start(context, GFP_KERNEL, AUDIT_TTY);
if (ab) {
char name[sizeof(tsk->comm)];

@@ -80,6 +81,8 @@ static void tty_audit_log(const char *description, dev_t dev,
audit_log_n_hex(ab, data, size);
audit_log_end(ab);
}
+ audit_log_container_info(context, "tty", audit_get_containerid(tsk));
+ audit_free_context(context);
}

/**
--
1.8.3.1
Richard Guy Briggs
2018-03-16 09:00:36 UTC
Permalink
Raw Message
Add container ID auxiliary records to configuration change, feature set change
and user generated standalone records.

Signed-off-by: Richard Guy Briggs <***@redhat.com>
---
kernel/audit.c | 50 ++++++++++++++++++++++++++++++++++++++++----------
kernel/auditfilter.c | 5 ++++-
2 files changed, 44 insertions(+), 11 deletions(-)

diff --git a/kernel/audit.c b/kernel/audit.c
index b238be5..08662b4 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -400,8 +400,9 @@ static int audit_log_config_change(char *function_name, u32 new, u32 old,
{
struct audit_buffer *ab;
int rc = 0;
+ struct audit_context *context = audit_alloc_local();

- ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE);
+ ab = audit_log_start(context, GFP_KERNEL, AUDIT_CONFIG_CHANGE);
if (unlikely(!ab))
return rc;
audit_log_format(ab, "%s=%u old=%u", function_name, new, old);
@@ -411,6 +412,8 @@ static int audit_log_config_change(char *function_name, u32 new, u32 old,
allow_changes = 0; /* Something weird, deny request */
audit_log_format(ab, " res=%d", allow_changes);
audit_log_end(ab);
+ audit_log_container_info(context, "config", audit_get_containerid(current));
+ audit_free_context(context);
return rc;
}

@@ -1058,7 +1061,8 @@ static int audit_netlink_ok(struct sk_buff *skb, u16 msg_type)
return err;
}

-static void audit_log_common_recv_msg(struct audit_buffer **ab, u16 msg_type)
+static void audit_log_common_recv_msg(struct audit_context *context,
+ struct audit_buffer **ab, u16 msg_type)
{
uid_t uid = from_kuid(&init_user_ns, current_uid());
pid_t pid = task_tgid_nr(current);
@@ -1068,7 +1072,7 @@ static void audit_log_common_recv_msg(struct audit_buffer **ab, u16 msg_type)
return;
}

- *ab = audit_log_start(NULL, GFP_KERNEL, msg_type);
+ *ab = audit_log_start(context, GFP_KERNEL, msg_type);
if (unlikely(!*ab))
return;
audit_log_format(*ab, "pid=%d uid=%u", pid, uid);
@@ -1097,11 +1101,12 @@ static void audit_log_feature_change(int which, u32 old_feature, u32 new_feature
u32 old_lock, u32 new_lock, int res)
{
struct audit_buffer *ab;
+ struct audit_context *context = audit_alloc_local();

if (audit_enabled == AUDIT_OFF)
return;

- ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_FEATURE_CHANGE);
+ ab = audit_log_start(context, GFP_KERNEL, AUDIT_FEATURE_CHANGE);
if (!ab)
return;
audit_log_task_info(ab, current);
@@ -1109,6 +1114,8 @@ static void audit_log_feature_change(int which, u32 old_feature, u32 new_feature
audit_feature_names[which], !!old_feature, !!new_feature,
!!old_lock, !!new_lock, res);
audit_log_end(ab);
+ audit_log_container_info(context, "feature", audit_get_containerid(current));
+ audit_free_context(context);
}

static int audit_set_feature(struct sk_buff *skb)
@@ -1337,13 +1344,15 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)

err = audit_filter(msg_type, AUDIT_FILTER_USER);
if (err == 1) { /* match or error */
+ struct audit_context *context = audit_alloc_local();
+
err = 0;
if (msg_type == AUDIT_USER_TTY) {
err = tty_audit_push();
if (err)
break;
}
- audit_log_common_recv_msg(&ab, msg_type);
+ audit_log_common_recv_msg(context, &ab, msg_type);
if (msg_type != AUDIT_USER_TTY)
audit_log_format(ab, " msg='%.*s'",
AUDIT_MESSAGE_TEXT_MAX,
@@ -1359,6 +1368,9 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
audit_log_n_untrustedstring(ab, data, size);
}
audit_log_end(ab);
+ audit_log_container_info(context, "user",
+ audit_get_containerid(current));
+ audit_free_context(context);
}
break;
case AUDIT_ADD_RULE:
@@ -1366,9 +1378,14 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
if (nlmsg_len(nlh) < sizeof(struct audit_rule_data))
return -EINVAL;
if (audit_enabled == AUDIT_LOCKED) {
- audit_log_common_recv_msg(&ab, AUDIT_CONFIG_CHANGE);
+ struct audit_context *context = audit_alloc_local();
+
+ audit_log_common_recv_msg(context, &ab, AUDIT_CONFIG_CHANGE);
audit_log_format(ab, " audit_enabled=%d res=0", audit_enabled);
audit_log_end(ab);
+ audit_log_container_info(context, "config",
+ audit_get_containerid(current));
+ audit_free_context(context);
return -EPERM;
}
err = audit_rule_change(msg_type, seq, data, nlmsg_len(nlh));
@@ -1376,17 +1393,23 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
case AUDIT_LIST_RULES:
err = audit_list_rules_send(skb, seq);
break;
- case AUDIT_TRIM:
+ case AUDIT_TRIM: {
+ struct audit_context *context = audit_alloc_local();
audit_trim_trees();
- audit_log_common_recv_msg(&ab, AUDIT_CONFIG_CHANGE);
+ audit_log_common_recv_msg(context, &ab, AUDIT_CONFIG_CHANGE);
audit_log_format(ab, " op=trim res=1");
audit_log_end(ab);
+ audit_log_container_info(context, "config",
+ audit_get_containerid(current));
+ audit_free_context(context);
break;
+ }
case AUDIT_MAKE_EQUIV: {
void *bufp = data;
u32 sizes[2];
size_t msglen = nlmsg_len(nlh);
char *old, *new;
+ struct audit_context *context = audit_alloc_local();

err = -EINVAL;
if (msglen < 2 * sizeof(u32))
@@ -1408,7 +1431,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
/* OK, here comes... */
err = audit_tag_tree(old, new);

- audit_log_common_recv_msg(&ab, AUDIT_CONFIG_CHANGE);
+ audit_log_common_recv_msg(context, &ab, AUDIT_CONFIG_CHANGE);

audit_log_format(ab, " op=make_equiv old=");
audit_log_untrustedstring(ab, old);
@@ -1418,6 +1441,9 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
audit_log_end(ab);
kfree(old);
kfree(new);
+ audit_log_container_info(context, "config",
+ audit_get_containerid(current));
+ audit_free_context(context);
break;
}
case AUDIT_SIGNAL_INFO:
@@ -1459,6 +1485,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
struct audit_tty_status s, old;
struct audit_buffer *ab;
unsigned int t;
+ struct audit_context *context = audit_alloc_local();

memset(&s, 0, sizeof(s));
/* guard against past and future API changes */
@@ -1477,12 +1504,15 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
old.enabled = t & AUDIT_TTY_ENABLE;
old.log_passwd = !!(t & AUDIT_TTY_LOG_PASSWD);

- audit_log_common_recv_msg(&ab, AUDIT_CONFIG_CHANGE);
+ audit_log_common_recv_msg(context, &ab, AUDIT_CONFIG_CHANGE);
audit_log_format(ab, " op=tty_set old-enabled=%d new-enabled=%d"
" old-log_passwd=%d new-log_passwd=%d res=%d",
old.enabled, s.enabled, old.log_passwd,
s.log_passwd, !err);
audit_log_end(ab);
+ audit_log_container_info(context, "config",
+ audit_get_containerid(current));
+ audit_free_context(context);
break;
}
default:
diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c
index c4c8746..5f7f4d6 100644
--- a/kernel/auditfilter.c
+++ b/kernel/auditfilter.c
@@ -1109,11 +1109,12 @@ static void audit_log_rule_change(char *action, struct audit_krule *rule, int re
struct audit_buffer *ab;
uid_t loginuid = from_kuid(&init_user_ns, audit_get_loginuid(current));
unsigned int sessionid = audit_get_sessionid(current);
+ struct audit_context *context = audit_alloc_local();

if (!audit_enabled)
return;

- ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE);
+ ab = audit_log_start(context, GFP_KERNEL, AUDIT_CONFIG_CHANGE);
if (!ab)
return;
audit_log_format(ab, "auid=%u ses=%u" ,loginuid, sessionid);
@@ -1122,6 +1123,8 @@ static void audit_log_rule_change(char *action, struct audit_krule *rule, int re
audit_log_key(ab, rule->filterkey);
audit_log_format(ab, " list=%d res=%d", rule->listnr, res);
audit_log_end(ab);
+ audit_log_container_info(context, "config", audit_get_containerid(current));
+ audit_free_context(context);
}

/**
--
1.8.3.1
Paul Moore
2018-04-19 01:27:34 UTC
Permalink
Raw Message
Post by Richard Guy Briggs
Add container ID auxiliary records to configuration change, feature set change
and user generated standalone records.
---
kernel/audit.c | 50 ++++++++++++++++++++++++++++++++++++++++----------
kernel/auditfilter.c | 5 ++++-
2 files changed, 44 insertions(+), 11 deletions(-)
diff --git a/kernel/audit.c b/kernel/audit.c
index b238be5..08662b4 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -400,8 +400,9 @@ static int audit_log_config_change(char *function_name, u32 new, u32 old,
{
struct audit_buffer *ab;
int rc = 0;
+ struct audit_context *context = audit_alloc_local();
We should be able to use current->audit_context here right? If we
can't for every caller, perhaps we pass an audit_context as an
argument and only allocate a local context when the passed
audit_context is NULL.

Also, if you're not comfortable always using current, just pass the
audit_context as you do with audit_log_common_recv_msg().
Post by Richard Guy Briggs
- ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE);
+ ab = audit_log_start(context, GFP_KERNEL, AUDIT_CONFIG_CHANGE);
if (unlikely(!ab))
return rc;
audit_log_format(ab, "%s=%u old=%u", function_name, new, old);
@@ -411,6 +412,8 @@ static int audit_log_config_change(char *function_name, u32 new, u32 old,
allow_changes = 0; /* Something weird, deny request */
audit_log_format(ab, " res=%d", allow_changes);
audit_log_end(ab);
+ audit_log_container_info(context, "config", audit_get_containerid(current));
+ audit_free_context(context);
return rc;
}
@@ -1058,7 +1061,8 @@ static int audit_netlink_ok(struct sk_buff *skb, u16 msg_type)
return err;
}
-static void audit_log_common_recv_msg(struct audit_buffer **ab, u16 msg_type)
+static void audit_log_common_recv_msg(struct audit_context *context,
+ struct audit_buffer **ab, u16 msg_type)
{
uid_t uid = from_kuid(&init_user_ns, current_uid());
pid_t pid = task_tgid_nr(current);
@@ -1068,7 +1072,7 @@ static void audit_log_common_recv_msg(struct audit_buffer **ab, u16 msg_type)
return;
}
- *ab = audit_log_start(NULL, GFP_KERNEL, msg_type);
+ *ab = audit_log_start(context, GFP_KERNEL, msg_type);
if (unlikely(!*ab))
return;
audit_log_format(*ab, "pid=%d uid=%u", pid, uid);
@@ -1097,11 +1101,12 @@ static void audit_log_feature_change(int which, u32 old_feature, u32 new_feature
u32 old_lock, u32 new_lock, int res)
{
struct audit_buffer *ab;
+ struct audit_context *context = audit_alloc_local();
So I know based on the other patch we are currently discussing that we
can use current here ...
Post by Richard Guy Briggs
if (audit_enabled == AUDIT_OFF)
return;
- ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_FEATURE_CHANGE);
+ ab = audit_log_start(context, GFP_KERNEL, AUDIT_FEATURE_CHANGE);
if (!ab)
return;
audit_log_task_info(ab, current);
@@ -1109,6 +1114,8 @@ static void audit_log_feature_change(int which, u32 old_feature, u32 new_feature
audit_feature_names[which], !!old_feature, !!new_feature,
!!old_lock, !!new_lock, res);
audit_log_end(ab);
+ audit_log_container_info(context, "feature", audit_get_containerid(current));
+ audit_free_context(context);
}
static int audit_set_feature(struct sk_buff *skb)
@@ -1337,13 +1344,15 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
err = audit_filter(msg_type, AUDIT_FILTER_USER);
if (err == 1) { /* match or error */
+ struct audit_context *context = audit_alloc_local();
I'm pretty sure we can use current here.
Post by Richard Guy Briggs
err = 0;
if (msg_type == AUDIT_USER_TTY) {
err = tty_audit_push();
if (err)
break;
}
- audit_log_common_recv_msg(&ab, msg_type);
+ audit_log_common_recv_msg(context, &ab, msg_type);
if (msg_type != AUDIT_USER_TTY)
audit_log_format(ab, " msg='%.*s'",
AUDIT_MESSAGE_TEXT_MAX,
@@ -1359,6 +1368,9 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
audit_log_n_untrustedstring(ab, data, size);
}
audit_log_end(ab);
+ audit_log_container_info(context, "user",
+ audit_get_containerid(current));
+ audit_free_context(context);
}
break;
@@ -1366,9 +1378,14 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
if (nlmsg_len(nlh) < sizeof(struct audit_rule_data))
return -EINVAL;
if (audit_enabled == AUDIT_LOCKED) {
- audit_log_common_recv_msg(&ab, AUDIT_CONFIG_CHANGE);
+ struct audit_context *context = audit_alloc_local();
Pretty sure current can be used here too. In fact I think everywhere
where we are processing commands from netlink we can use current as I
believe the entire netlink stack is processed in the context of the
caller.
Post by Richard Guy Briggs
+ audit_log_common_recv_msg(context, &ab, AUDIT_CONFIG_CHANGE);
audit_log_format(ab, " audit_enabled=%d res=0", audit_enabled);
audit_log_end(ab);
+ audit_log_container_info(context, "config",
+ audit_get_containerid(current));
+ audit_free_context(context);
return -EPERM;
}
err = audit_rule_change(msg_type, seq, data, nlmsg_len(nlh));
@@ -1376,17 +1393,23 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
err = audit_list_rules_send(skb, seq);
break;
+ case AUDIT_TRIM: {
+ struct audit_context *context = audit_alloc_local();
Same.
Post by Richard Guy Briggs
audit_trim_trees();
- audit_log_common_recv_msg(&ab, AUDIT_CONFIG_CHANGE);
+ audit_log_common_recv_msg(context, &ab, AUDIT_CONFIG_CHANGE);
audit_log_format(ab, " op=trim res=1");
audit_log_end(ab);
+ audit_log_container_info(context, "config",
+ audit_get_containerid(current));
+ audit_free_context(context);
break;
+ }
case AUDIT_MAKE_EQUIV: {
void *bufp = data;
u32 sizes[2];
size_t msglen = nlmsg_len(nlh);
char *old, *new;
+ struct audit_context *context = audit_alloc_local();
Same.
Post by Richard Guy Briggs
err = -EINVAL;
if (msglen < 2 * sizeof(u32))
@@ -1408,7 +1431,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
/* OK, here comes... */
err = audit_tag_tree(old, new);
- audit_log_common_recv_msg(&ab, AUDIT_CONFIG_CHANGE);
+ audit_log_common_recv_msg(context, &ab, AUDIT_CONFIG_CHANGE);
audit_log_format(ab, " op=make_equiv old=");
audit_log_untrustedstring(ab, old);
@@ -1418,6 +1441,9 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
audit_log_end(ab);
kfree(old);
kfree(new);
+ audit_log_container_info(context, "config",
+ audit_get_containerid(current));
+ audit_free_context(context);
break;
}
@@ -1459,6 +1485,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
struct audit_tty_status s, old;
struct audit_buffer *ab;
unsigned int t;
+ struct audit_context *context = audit_alloc_local();
Same.
Post by Richard Guy Briggs
memset(&s, 0, sizeof(s));
/* guard against past and future API changes */
@@ -1477,12 +1504,15 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
old.enabled = t & AUDIT_TTY_ENABLE;
old.log_passwd = !!(t & AUDIT_TTY_LOG_PASSWD);
- audit_log_common_recv_msg(&ab, AUDIT_CONFIG_CHANGE);
+ audit_log_common_recv_msg(context, &ab, AUDIT_CONFIG_CHANGE);
audit_log_format(ab, " op=tty_set old-enabled=%d new-enabled=%d"
" old-log_passwd=%d new-log_passwd=%d res=%d",
old.enabled, s.enabled, old.log_passwd,
s.log_passwd, !err);
audit_log_end(ab);
+ audit_log_container_info(context, "config",
+ audit_get_containerid(current));
+ audit_free_context(context);
break;
}
diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c
index c4c8746..5f7f4d6 100644
--- a/kernel/auditfilter.c
+++ b/kernel/auditfilter.c
@@ -1109,11 +1109,12 @@ static void audit_log_rule_change(char *action, struct audit_krule *rule, int re
struct audit_buffer *ab;
uid_t loginuid = from_kuid(&init_user_ns, audit_get_loginuid(current));
unsigned int sessionid = audit_get_sessionid(current);
+ struct audit_context *context = audit_alloc_local();
if (!audit_enabled)
return;
Well, first I think we should be able to get rid of the local context,
but if for some reason we can't use current->audit_context then do the
allocation after the audit_enabled check.
Post by Richard Guy Briggs
- ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE);
+ ab = audit_log_start(context, GFP_KERNEL, AUDIT_CONFIG_CHANGE);
if (!ab)
return;
audit_log_format(ab, "auid=%u ses=%u" ,loginuid, sessionid);
@@ -1122,6 +1123,8 @@ static void audit_log_rule_change(char *action, struct audit_krule *rule, int re
audit_log_key(ab, rule->filterkey);
audit_log_format(ab, " list=%d res=%d", rule->listnr, res);
audit_log_end(ab);
+ audit_log_container_info(context, "config", audit_get_containerid(current));
+ audit_free_context(context);
}
--
paul moore
www.paul-moore.com
Richard Guy Briggs
2018-04-19 12:31:09 UTC
Permalink
Raw Message
Post by Paul Moore
Post by Richard Guy Briggs
Add container ID auxiliary records to configuration change, feature set change
and user generated standalone records.
---
kernel/audit.c | 50 ++++++++++++++++++++++++++++++++++++++++----------
kernel/auditfilter.c | 5 ++++-
2 files changed, 44 insertions(+), 11 deletions(-)
diff --git a/kernel/audit.c b/kernel/audit.c
index b238be5..08662b4 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -400,8 +400,9 @@ static int audit_log_config_change(char *function_name, u32 new, u32 old,
{
struct audit_buffer *ab;
int rc = 0;
+ struct audit_context *context = audit_alloc_local();
We should be able to use current->audit_context here right? If we
can't for every caller, perhaps we pass an audit_context as an
argument and only allocate a local context when the passed
audit_context is NULL.
Also, if you're not comfortable always using current, just pass the
audit_context as you do with audit_log_common_recv_msg().
As mentioned in the tree/watch/mark patch, this is all obsoleted by
making the AUDIT_CONFIG_CHANGE record a SYSCALL auxiliary record.
This review would have been more helpful a month and a half ago.
Post by Paul Moore
Post by Richard Guy Briggs
- ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE);
+ ab = audit_log_start(context, GFP_KERNEL, AUDIT_CONFIG_CHANGE);
if (unlikely(!ab))
return rc;
audit_log_format(ab, "%s=%u old=%u", function_name, new, old);
@@ -411,6 +412,8 @@ static int audit_log_config_change(char *function_name, u32 new, u32 old,
allow_changes = 0; /* Something weird, deny request */
audit_log_format(ab, " res=%d", allow_changes);
audit_log_end(ab);
+ audit_log_container_info(context, "config", audit_get_containerid(current));
+ audit_free_context(context);
return rc;
}
@@ -1058,7 +1061,8 @@ static int audit_netlink_ok(struct sk_buff *skb, u16 msg_type)
return err;
}
-static void audit_log_common_recv_msg(struct audit_buffer **ab, u16 msg_type)
+static void audit_log_common_recv_msg(struct audit_context *context,
+ struct audit_buffer **ab, u16 msg_type)
{
uid_t uid = from_kuid(&init_user_ns, current_uid());
pid_t pid = task_tgid_nr(current);
@@ -1068,7 +1072,7 @@ static void audit_log_common_recv_msg(struct audit_buffer **ab, u16 msg_type)
return;
}
- *ab = audit_log_start(NULL, GFP_KERNEL, msg_type);
+ *ab = audit_log_start(context, GFP_KERNEL, msg_type);
if (unlikely(!*ab))
return;
audit_log_format(*ab, "pid=%d uid=%u", pid, uid);
@@ -1097,11 +1101,12 @@ static void audit_log_feature_change(int which, u32 old_feature, u32 new_feature
u32 old_lock, u32 new_lock, int res)
{
struct audit_buffer *ab;
+ struct audit_context *context = audit_alloc_local();
So I know based on the other patch we are currently discussing that we
can use current here ...
Post by Richard Guy Briggs
if (audit_enabled == AUDIT_OFF)
return;
- ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_FEATURE_CHANGE);
+ ab = audit_log_start(context, GFP_KERNEL, AUDIT_FEATURE_CHANGE);
if (!ab)
return;
audit_log_task_info(ab, current);
@@ -1109,6 +1114,8 @@ static void audit_log_feature_change(int which, u32 old_feature, u32 new_feature
audit_feature_names[which], !!old_feature, !!new_feature,
!!old_lock, !!new_lock, res);
audit_log_end(ab);
+ audit_log_container_info(context, "feature", audit_get_containerid(current));
+ audit_free_context(context);
}
static int audit_set_feature(struct sk_buff *skb)
@@ -1337,13 +1344,15 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
err = audit_filter(msg_type, AUDIT_FILTER_USER);
if (err == 1) { /* match or error */
+ struct audit_context *context = audit_alloc_local();
I'm pretty sure we can use current here.
Post by Richard Guy Briggs
err = 0;
if (msg_type == AUDIT_USER_TTY) {
err = tty_audit_push();
if (err)
break;
}
- audit_log_common_recv_msg(&ab, msg_type);
+ audit_log_common_recv_msg(context, &ab, msg_type);
if (msg_type != AUDIT_USER_TTY)
audit_log_format(ab, " msg='%.*s'",
AUDIT_MESSAGE_TEXT_MAX,
@@ -1359,6 +1368,9 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
audit_log_n_untrustedstring(ab, data, size);
}
audit_log_end(ab);
+ audit_log_container_info(context, "user",
+ audit_get_containerid(current));
+ audit_free_context(context);
}
break;
@@ -1366,9 +1378,14 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
if (nlmsg_len(nlh) < sizeof(struct audit_rule_data))
return -EINVAL;
if (audit_enabled == AUDIT_LOCKED) {
- audit_log_common_recv_msg(&ab, AUDIT_CONFIG_CHANGE);
+ struct audit_context *context = audit_alloc_local();
Pretty sure current can be used here too. In fact I think everywhere
where we are processing commands from netlink we can use current as I
believe the entire netlink stack is processed in the context of the
caller.
Post by Richard Guy Briggs
+ audit_log_common_recv_msg(context, &ab, AUDIT_CONFIG_CHANGE);
audit_log_format(ab, " audit_enabled=%d res=0", audit_enabled);
audit_log_end(ab);
+ audit_log_container_info(context, "config",
+ audit_get_containerid(current));
+ audit_free_context(context);
return -EPERM;
}
err = audit_rule_change(msg_type, seq, data, nlmsg_len(nlh));
@@ -1376,17 +1393,23 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
err = audit_list_rules_send(skb, seq);
break;
+ case AUDIT_TRIM: {
+ struct audit_context *context = audit_alloc_local();
Same.
Post by Richard Guy Briggs
audit_trim_trees();
- audit_log_common_recv_msg(&ab, AUDIT_CONFIG_CHANGE);
+ audit_log_common_recv_msg(context, &ab, AUDIT_CONFIG_CHANGE);
audit_log_format(ab, " op=trim res=1");
audit_log_end(ab);
+ audit_log_container_info(context, "config",
+ audit_get_containerid(current));
+ audit_free_context(context);
break;
+ }
case AUDIT_MAKE_EQUIV: {
void *bufp = data;
u32 sizes[2];
size_t msglen = nlmsg_len(nlh);
char *old, *new;
+ struct audit_context *context = audit_alloc_local();
Same.
Post by Richard Guy Briggs
err = -EINVAL;
if (msglen < 2 * sizeof(u32))
@@ -1408,7 +1431,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
/* OK, here comes... */
err = audit_tag_tree(old, new);
- audit_log_common_recv_msg(&ab, AUDIT_CONFIG_CHANGE);
+ audit_log_common_recv_msg(context, &ab, AUDIT_CONFIG_CHANGE);
audit_log_format(ab, " op=make_equiv old=");
audit_log_untrustedstring(ab, old);
@@ -1418,6 +1441,9 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
audit_log_end(ab);
kfree(old);
kfree(new);
+ audit_log_container_info(context, "config",
+ audit_get_containerid(current));
+ audit_free_context(context);
break;
}
@@ -1459,6 +1485,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
struct audit_tty_status s, old;
struct audit_buffer *ab;
unsigned int t;
+ struct audit_context *context = audit_alloc_local();
Same.
Post by Richard Guy Briggs
memset(&s, 0, sizeof(s));
/* guard against past and future API changes */
@@ -1477,12 +1504,15 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
old.enabled = t & AUDIT_TTY_ENABLE;
old.log_passwd = !!(t & AUDIT_TTY_LOG_PASSWD);
- audit_log_common_recv_msg(&ab, AUDIT_CONFIG_CHANGE);
+ audit_log_common_recv_msg(context, &ab, AUDIT_CONFIG_CHANGE);
audit_log_format(ab, " op=tty_set old-enabled=%d new-enabled=%d"
" old-log_passwd=%d new-log_passwd=%d res=%d",
old.enabled, s.enabled, old.log_passwd,
s.log_passwd, !err);
audit_log_end(ab);
+ audit_log_container_info(context, "config",
+ audit_get_containerid(current));
+ audit_free_context(context);
break;
}
diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c
index c4c8746..5f7f4d6 100644
--- a/kernel/auditfilter.c
+++ b/kernel/auditfilter.c
@@ -1109,11 +1109,12 @@ static void audit_log_rule_change(char *action, struct audit_krule *rule, int re
struct audit_buffer *ab;
uid_t loginuid = from_kuid(&init_user_ns, audit_get_loginuid(current));
unsigned int sessionid = audit_get_sessionid(current);
+ struct audit_context *context = audit_alloc_local();
if (!audit_enabled)
return;
Well, first I think we should be able to get rid of the local context,
but if for some reason we can't use current->audit_context then do the
allocation after the audit_enabled check.
Post by Richard Guy Briggs
- ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE);
+ ab = audit_log_start(context, GFP_KERNEL, AUDIT_CONFIG_CHANGE);
if (!ab)
return;
audit_log_format(ab, "auid=%u ses=%u" ,loginuid, sessionid);
@@ -1122,6 +1123,8 @@ static void audit_log_rule_change(char *action, struct audit_krule *rule, int re
audit_log_key(ab, rule->filterkey);
audit_log_format(ab, " list=%d res=%d", rule->listnr, res);
audit_log_end(ab);
+ audit_log_container_info(context, "config", audit_get_containerid(current));
+ audit_free_context(context);
}
--
paul moore
www.paul-moore.com
- RGB

--
Richard Guy Briggs <***@redhat.com>
Sr. S/W Engineer, Kernel Security, Base Operating Systems
Remote, Ottawa, Red Hat Canada
IRC: rgb, SunRaycer
Voice: +1.647.777.2635, Internal: (81) 32635
Paul Moore
2018-04-19 12:59:29 UTC
Permalink
Raw Message
Post by Richard Guy Briggs
Post by Paul Moore
Post by Richard Guy Briggs
Add container ID auxiliary records to configuration change, feature set change
and user generated standalone records.
---
kernel/audit.c | 50 ++++++++++++++++++++++++++++++++++++++++----------
kernel/auditfilter.c | 5 ++++-
2 files changed, 44 insertions(+), 11 deletions(-)
diff --git a/kernel/audit.c b/kernel/audit.c
index b238be5..08662b4 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -400,8 +400,9 @@ static int audit_log_config_change(char *function_name, u32 new, u32 old,
{
struct audit_buffer *ab;
int rc = 0;
+ struct audit_context *context = audit_alloc_local();
We should be able to use current->audit_context here right? If we
can't for every caller, perhaps we pass an audit_context as an
argument and only allocate a local context when the passed
audit_context is NULL.
Also, if you're not comfortable always using current, just pass the
audit_context as you do with audit_log_common_recv_msg().
As mentioned in the tree/watch/mark patch, this is all obsoleted by
making the AUDIT_CONFIG_CHANGE record a SYSCALL auxiliary record.
You've known about my desire to connect records for quite some time.
Post by Richard Guy Briggs
This review would have been more helpful a month and a half ago.
If you really want to sink to that level of discussion, better quality
patches from you would have been helpful too, that is the one of the
main reasons why it takes so long to review your code. Let's keep the
commentary focused on the code, discussions like this aren't likely to
be helpful to anyone.
--
paul moore
www.paul-moore.com
Richard Guy Briggs
2018-03-16 09:00:40 UTC
Permalink
Raw Message
Add support for reading the container ID from the proc filesystem.

This is a read from the proc entry of the form /proc/PID/containerid
where PID is the process ID of the task whose container ID is sought.

The read expects up to a u64 value (unset: 18446744073709551615).

Signed-off-by: Richard Guy Briggs <***@redhat.com>
---
fs/proc/base.c | 20 ++++++++++++++++++--
1 file changed, 18 insertions(+), 2 deletions(-)

diff --git a/fs/proc/base.c b/fs/proc/base.c
index 6ce4fbe..f66d1e2 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -1300,6 +1300,21 @@ static ssize_t proc_sessionid_read(struct file * file, char __user * buf,
.llseek = generic_file_llseek,
};

+static ssize_t proc_containerid_read(struct file *file, char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ struct inode *inode = file_inode(file);
+ struct task_struct *task = get_proc_task(inode);
+ ssize_t length;
+ char tmpbuf[TMPBUFLEN*2];
+
+ if (!task)
+ return -ESRCH;
+ length = scnprintf(tmpbuf, TMPBUFLEN*2, "%llu", audit_get_containerid(task));
+ put_task_struct(task);
+ return simple_read_from_buffer(buf, count, ppos, tmpbuf, length);
+}
+
static ssize_t proc_containerid_write(struct file *file, const char __user *buf,
size_t count, loff_t *ppos)
{
@@ -1330,6 +1345,7 @@ static ssize_t proc_containerid_write(struct file *file, const char __user *buf,
}

static const struct file_operations proc_containerid_operations = {
+ .read = proc_containerid_read,
.write = proc_containerid_write,
.llseek = generic_file_llseek,
};
@@ -2996,7 +3012,7 @@ static int proc_pid_patch_state(struct seq_file *m, struct pid_namespace *ns,
#ifdef CONFIG_AUDITSYSCALL
REG("loginuid", S_IWUSR|S_IRUGO, proc_loginuid_operations),
REG("sessionid", S_IRUGO, proc_sessionid_operations),
- REG("containerid", S_IWUSR, proc_containerid_operations),
+ REG("containerid", S_IWUSR|S_IRUSR, proc_containerid_operations),
#endif
#ifdef CONFIG_FAULT_INJECTION
REG("make-it-fail", S_IRUGO|S_IWUSR, proc_fault_inject_operations),
@@ -3391,7 +3407,7 @@ static int proc_tid_comm_permission(struct inode *inode, int mask)
#ifdef CONFIG_AUDITSYSCALL
REG("loginuid", S_IWUSR|S_IRUGO, proc_loginuid_operations),
REG("sessionid", S_IRUGO, proc_sessionid_operations),
- REG("containerid", S_IWUSR, proc_containerid_operations),
+ REG("containerid", S_IWUSR|S_IRUSR, proc_containerid_operations),
#endif
#ifdef CONFIG_FAULT_INJECTION
REG("make-it-fail", S_IRUGO|S_IWUSR, proc_fault_inject_operations),
--
1.8.3.1
Steve Grubb
2018-05-21 19:16:01 UTC
Permalink
Raw Message
Post by Richard Guy Briggs
Add support for reading the container ID from the proc filesystem.
I think this could be useful in general. Please consider this to be part of
the full patch set and not something merely used to debug the patches.

-Steve
Post by Richard Guy Briggs
This is a read from the proc entry of the form /proc/PID/containerid
where PID is the process ID of the task whose container ID is sought.
The read expects up to a u64 value (unset: 18446744073709551615).
---
fs/proc/base.c | 20 ++++++++++++++++++--
1 file changed, 18 insertions(+), 2 deletions(-)
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 6ce4fbe..f66d1e2 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -1300,6 +1300,21 @@ static ssize_t proc_sessionid_read(struct file *
file, char __user * buf, .llseek = generic_file_llseek,
};
+static ssize_t proc_containerid_read(struct file *file, char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ struct inode *inode = file_inode(file);
+ struct task_struct *task = get_proc_task(inode);
+ ssize_t length;
+ char tmpbuf[TMPBUFLEN*2];
+
+ if (!task)
+ return -ESRCH;
+ length = scnprintf(tmpbuf, TMPBUFLEN*2, "%llu",
audit_get_containerid(task)); + put_task_struct(task);
+ return simple_read_from_buffer(buf, count, ppos, tmpbuf, length);
+}
+
static ssize_t proc_containerid_write(struct file *file, const char __user
*buf, size_t count, loff_t *ppos)
{
@@ -1330,6 +1345,7 @@ static ssize_t proc_containerid_write(struct file
*file, const char __user *buf, }
static const struct file_operations proc_containerid_operations = {
+ .read = proc_containerid_read,
.write = proc_containerid_write,
.llseek = generic_file_llseek,
};
@@ -2996,7 +3012,7 @@ static int proc_pid_patch_state(struct seq_file *m,
struct pid_namespace *ns, #ifdef CONFIG_AUDITSYSCALL
REG("loginuid", S_IWUSR|S_IRUGO, proc_loginuid_operations),
REG("sessionid", S_IRUGO, proc_sessionid_operations),
- REG("containerid", S_IWUSR, proc_containerid_operations),
+ REG("containerid", S_IWUSR|S_IRUSR, proc_containerid_operations),
#endif
#ifdef CONFIG_FAULT_INJECTION
REG("make-it-fail", S_IRUGO|S_IWUSR, proc_fault_inject_operations),
@@ -3391,7 +3407,7 @@ static int proc_tid_comm_permission(struct inode
*inode, int mask) #ifdef CONFIG_AUDITSYSCALL
REG("loginuid", S_IWUSR|S_IRUGO, proc_loginuid_operations),
REG("sessionid", S_IRUGO, proc_sessionid_operations),
- REG("containerid", S_IWUSR, proc_containerid_operations),
+ REG("containerid", S_IWUSR|S_IRUSR, proc_containerid_operations),
#endif
#ifdef CONFIG_FAULT_INJECTION
REG("make-it-fail", S_IRUGO|S_IWUSR, proc_fault_inject_operations),
Eric W. Biederman
2018-05-21 19:19:24 UTC
Permalink
Raw Message
Post by Steve Grubb
Post by Richard Guy Briggs
Add support for reading the container ID from the proc filesystem.
I think this could be useful in general. Please consider this to be part of
the full patch set and not something merely used to debug the patches.
Only with an audit specific name.

As it is:

Nacked-by: "Eric W. Biederman" <***@xmission.com>

The truth is the containerid name really stinks and is quite confusing
and does not imply that the label applies only to audit. And little
things like this make me extremely uncofortable with it.

Eric
Paul Moore
2018-05-21 20:06:31 UTC
Permalink
Raw Message
On Mon, May 21, 2018 at 3:19 PM, Eric W. Biederman
Post by Eric W. Biederman
Post by Steve Grubb
Post by Richard Guy Briggs
Add support for reading the container ID from the proc filesystem.
I think this could be useful in general. Please consider this to be part of
the full patch set and not something merely used to debug the patches.
Only with an audit specific name.
The truth is the containerid name really stinks and is quite confusing
and does not imply that the label applies only to audit. And little
things like this make me extremely uncofortable with it.
It also makes the audit container ID (notice how I *always* call it
the *audit* container ID? that is not an accident) available for
userspace applications to abuse. Perhaps in the future we can look at
ways to make this more available to applications, but this patch is
not the answer.
--
paul moore
www.paul-moore.com
Richard Guy Briggs
2018-05-22 17:35:41 UTC
Permalink
Raw Message
Post by Paul Moore
Post by Eric W. Biederman
Post by Steve Grubb
Post by Richard Guy Briggs
Add support for reading the container ID from the proc filesystem.
I think this could be useful in general. Please consider this to be part of
the full patch set and not something merely used to debug the patches.
Only with an audit specific name.
The truth is the containerid name really stinks and is quite confusing
and does not imply that the label applies only to audit. And little
things like this make me extremely uncofortable with it.
It also makes the audit container ID (notice how I *always* call it
the *audit* container ID? that is not an accident) available for
userspace applications to abuse. Perhaps in the future we can look at
ways to make this more available to applications, but this patch is
not the answer.
Do you have a productive suggestion?
Post by Paul Moore
paul moore
- RGB

--
Richard Guy Briggs <***@redhat.com>
Sr. S/W Engineer, Kernel Security, Base Operating Systems
Remote, Ottawa, Red Hat Canada
IRC: rgb, SunRaycer
Voice: +1.647.777.2635, Internal: (81) 32635
Paul Moore
2018-05-22 18:59:38 UTC
Permalink
Raw Message
Post by Richard Guy Briggs
Post by Paul Moore
Post by Eric W. Biederman
Post by Steve Grubb
Post by Richard Guy Briggs
Add support for reading the container ID from the proc filesystem.
I think this could be useful in general. Please consider this to be part of
the full patch set and not something merely used to debug the patches.
Only with an audit specific name.
The truth is the containerid name really stinks and is quite confusing
and does not imply that the label applies only to audit. And little
things like this make me extremely uncofortable with it.
It also makes the audit container ID (notice how I *always* call it
the *audit* container ID? that is not an accident) available for
userspace applications to abuse. Perhaps in the future we can look at
ways to make this more available to applications, but this patch is
not the answer.
Do you have a productive suggestion?
I haven't given it much thought beyond our discussions and until we
get the basic audit container ID support in place (all the other parts
of this patchset) I doubt I'll be giving it much thought.
--
paul moore
www.paul-moore.com
Richard Guy Briggs
2018-03-16 09:00:32 UTC
Permalink
Raw Message
Add container ID support to ptrace and signals. In particular, the "op"
field provides a way to label the auxiliary record to which it is
associated.

Signed-off-by: Richard Guy Briggs <***@redhat.com>
---
include/linux/audit.h | 16 +++++++++++-----
kernel/audit.c | 12 ++++++++----
kernel/audit.h | 2 ++
kernel/auditsc.c | 19 +++++++++++++++----
4 files changed, 36 insertions(+), 13 deletions(-)

diff --git a/include/linux/audit.h b/include/linux/audit.h
index f10ca1b..ed16bb6 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -35,6 +35,7 @@ struct audit_sig_info {
uid_t uid;
pid_t pid;
char ctx[0];
+ u64 cid;
};

struct audit_buffer;
@@ -155,8 +156,8 @@ extern void audit_log_link_denied(const char *operation,
extern int audit_log_task_context(struct audit_buffer *ab);
extern void audit_log_task_info(struct audit_buffer *ab,
struct task_struct *tsk);
-extern int audit_log_container_info(struct task_struct *tsk,
- struct audit_context *context);
+extern int audit_log_container_info(struct audit_context *context,
+ char *op, u64 containerid);

extern int audit_update_lsm_rules(void);

@@ -208,8 +209,8 @@ static inline int audit_log_task_context(struct audit_buffer *ab)
static inline void audit_log_task_info(struct audit_buffer *ab,
struct task_struct *tsk)
{ }
-static inline int audit_log_container_info(struct task_struct *tsk,
- struct audit_context *context);
+static inline int audit_log_container_info(struct audit_context *context,
+ char *op, u64 containerid);
{ }
#define audit_enabled 0
#endif /* CONFIG_AUDIT */
@@ -598,9 +599,14 @@ static inline bool audit_loginuid_set(struct task_struct *tsk)
return uid_valid(audit_get_loginuid(tsk));
}

+static inline bool cid_valid(u64 containerid)
+{
+ return containerid != INVALID_CID;
+}
+
static inline bool audit_containerid_set(struct task_struct *tsk)
{
- return audit_get_containerid(tsk) != INVALID_CID;
+ return cid_valid(audit_get_containerid(tsk));
}

static inline void audit_log_string(struct audit_buffer *ab, const char *buf)
diff --git a/kernel/audit.c b/kernel/audit.c
index a12f21f..b238be5 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -142,6 +142,7 @@ struct audit_net {
kuid_t audit_sig_uid = INVALID_UID;
pid_t audit_sig_pid = -1;
u32 audit_sig_sid = 0;
+u64 audit_sig_cid = INVALID_CID;

/* Records can be lost in several ways:
0) [suppressed in audit_alloc]
@@ -1438,6 +1439,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
memcpy(sig_data->ctx, ctx, len);
security_release_secctx(ctx, len);
}
+ sig_data->cid = audit_sig_cid;
audit_send_reply(skb, seq, AUDIT_SIGNAL_INFO, 0, 0,
sig_data, sizeof(*sig_data) + len);
kfree(sig_data);
@@ -2051,20 +2053,22 @@ void audit_log_session_info(struct audit_buffer *ab)

/*
* audit_log_container_info - report container info
- * @tsk: task to be recorded
* @context: task or local context for record
+ * @op: containerid string description
+ * @containerid: container ID to report
*/
-int audit_log_container_info(struct task_struct *tsk, struct audit_context *context)
+int audit_log_container_info(struct audit_context *context,
+ char *op, u64 containerid)
{
struct audit_buffer *ab;

- if (!audit_containerid_set(tsk))
+ if (!cid_valid(containerid))
return 0;
/* Generate AUDIT_CONTAINER_INFO with container ID */
ab = audit_log_start(context, GFP_KERNEL, AUDIT_CONTAINER_INFO);
if (!ab)
return -ENOMEM;
- audit_log_format(ab, "contid=%llu", audit_get_containerid(tsk));
+ audit_log_format(ab, "op=%s contid=%llu", op, containerid);
audit_log_end(ab);
return 0;
}
diff --git a/kernel/audit.h b/kernel/audit.h
index aaa651a..743d445 100644
--- a/kernel/audit.h
+++ b/kernel/audit.h
@@ -147,6 +147,7 @@ struct audit_context {
kuid_t target_uid;
unsigned int target_sessionid;
u32 target_sid;
+ u64 target_cid;
char target_comm[TASK_COMM_LEN];

struct audit_tree_refs *trees, *first_trees;
@@ -330,6 +331,7 @@ extern void audit_log_d_path_exe(struct audit_buffer *ab,
extern pid_t audit_sig_pid;
extern kuid_t audit_sig_uid;
extern u32 audit_sig_sid;
+extern u64 audit_sig_cid;

extern int audit_filter(int msgtype, unsigned int listtype);

diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 2bba324..2932ef1 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -113,6 +113,7 @@ struct audit_aux_data_pids {
kuid_t target_uid[AUDIT_AUX_PIDS];
unsigned int target_sessionid[AUDIT_AUX_PIDS];
u32 target_sid[AUDIT_AUX_PIDS];
+ u64 target_cid[AUDIT_AUX_PIDS];
char target_comm[AUDIT_AUX_PIDS][TASK_COMM_LEN];
int pid_count;
};
@@ -1422,21 +1423,27 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts
for (aux = context->aux_pids; aux; aux = aux->next) {
struct audit_aux_data_pids *axs = (void *)aux;

- for (i = 0; i < axs->pid_count; i++)
+ for (i = 0; i < axs->pid_count; i++) {
+ char axsn[sizeof("aux0xN ")];
+
+ sprintf(axsn, "aux0x%x", i);
if (audit_log_pid_context(context, axs->target_pid[i],
axs->target_auid[i],
axs->target_uid[i],
axs->target_sessionid[i],
axs->target_sid[i],
- axs->target_comm[i]))
+ axs->target_comm[i])
+ && audit_log_container_info(context, axsn, axs->target_cid[i]))
call_panic = 1;
+ }
}

if (context->target_pid &&
audit_log_pid_context(context, context->target_pid,
context->target_auid, context->target_uid,
context->target_sessionid,
- context->target_sid, context->target_comm))
+ context->target_sid, context->target_comm)
+ && audit_log_container_info(context, "target", context->target_cid))
call_panic = 1;

if (context->pwd.dentry && context->pwd.mnt) {
@@ -1456,7 +1463,7 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts

audit_log_proctitle(tsk, context);

- audit_log_container_info(tsk, context);
+ audit_log_container_info(context, "task", audit_get_containerid(tsk));

/* Send end of event record to help user space know we are finished */
ab = audit_log_start(context, GFP_KERNEL, AUDIT_EOE);
@@ -2356,6 +2363,7 @@ void __audit_ptrace(struct task_struct *t)
context->target_uid = task_uid(t);
context->target_sessionid = audit_get_sessionid(t);
security_task_getsecid(t, &context->target_sid);
+ context->target_cid = audit_get_containerid(t);
memcpy(context->target_comm, t->comm, TASK_COMM_LEN);
}

@@ -2383,6 +2391,7 @@ int audit_signal_info(int sig, struct task_struct *t)
else
audit_sig_uid = uid;
security_task_getsecid(tsk, &audit_sig_sid);
+ audit_sig_cid = audit_get_containerid(tsk);
}

if (!audit_signals || audit_dummy_context())
@@ -2396,6 +2405,7 @@ int audit_signal_info(int sig, struct task_struct *t)
ctx->target_uid = t_uid;
ctx->target_sessionid = audit_get_sessionid(t);
security_task_getsecid(t, &ctx->target_sid);
+ ctx->target_cid = audit_get_containerid(t);
memcpy(ctx->target_comm, t->comm, TASK_COMM_LEN);
return 0;
}
@@ -2417,6 +2427,7 @@ int audit_signal_info(int sig, struct task_struct *t)
axp->target_uid[axp->pid_count] = t_uid;
axp->target_sessionid[axp->pid_count] = audit_get_sessionid(t);
security_task_getsecid(t, &axp->target_sid[axp->pid_count]);
+ axp->target_cid[axp->pid_count] = audit_get_containerid(t);
memcpy(axp->target_comm[axp->pid_count], t->comm, TASK_COMM_LEN);
axp->pid_count++;
--
1.8.3.1
Paul Moore
2018-04-19 00:32:15 UTC
Permalink
Raw Message
Post by Richard Guy Briggs
Add container ID support to ptrace and signals. In particular, the "op"
field provides a way to label the auxiliary record to which it is
associated.
---
include/linux/audit.h | 16 +++++++++++-----
kernel/audit.c | 12 ++++++++----
kernel/audit.h | 2 ++
kernel/auditsc.c | 19 +++++++++++++++----
4 files changed, 36 insertions(+), 13 deletions(-)
...
Post by Richard Guy Briggs
diff --git a/kernel/audit.c b/kernel/audit.c
index a12f21f..b238be5 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -142,6 +142,7 @@ struct audit_net {
kuid_t audit_sig_uid = INVALID_UID;
pid_t audit_sig_pid = -1;
u32 audit_sig_sid = 0;
+u64 audit_sig_cid = INVALID_CID;
0) [suppressed in audit_alloc]
@@ -1438,6 +1439,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
memcpy(sig_data->ctx, ctx, len);
security_release_secctx(ctx, len);
}
+ sig_data->cid = audit_sig_cid;
audit_send_reply(skb, seq, AUDIT_SIGNAL_INFO, 0, 0,
sig_data, sizeof(*sig_data) + len);
kfree(sig_data);
@@ -2051,20 +2053,22 @@ void audit_log_session_info(struct audit_buffer *ab)
/*
* audit_log_container_info - report container info
*/
-int audit_log_container_info(struct task_struct *tsk, struct audit_context *context)
+int audit_log_container_info(struct audit_context *context,
+ char *op, u64 containerid)
{
struct audit_buffer *ab;
- if (!audit_containerid_set(tsk))
+ if (!cid_valid(containerid))
return 0;
/* Generate AUDIT_CONTAINER_INFO with container ID */
ab = audit_log_start(context, GFP_KERNEL, AUDIT_CONTAINER_INFO);
if (!ab)
return -ENOMEM;
- audit_log_format(ab, "contid=%llu", audit_get_containerid(tsk));
+ audit_log_format(ab, "op=%s contid=%llu", op, containerid);
audit_log_end(ab);
return 0;
}
Let's get these changes into the first patch where
audit_log_container_info() is defined. Why? This inserts a new field
into the record which is a no-no. Yes, it is one single patchset, but
they are still separate patches and who knows which patches a given
distribution and/or tree may decide to backport.
Post by Richard Guy Briggs
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 2bba324..2932ef1 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -113,6 +113,7 @@ struct audit_aux_data_pids {
kuid_t target_uid[AUDIT_AUX_PIDS];
unsigned int target_sessionid[AUDIT_AUX_PIDS];
u32 target_sid[AUDIT_AUX_PIDS];
+ u64 target_cid[AUDIT_AUX_PIDS];
char target_comm[AUDIT_AUX_PIDS][TASK_COMM_LEN];
int pid_count;
};
@@ -1422,21 +1423,27 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts
for (aux = context->aux_pids; aux; aux = aux->next) {
struct audit_aux_data_pids *axs = (void *)aux;
- for (i = 0; i < axs->pid_count; i++)
+ for (i = 0; i < axs->pid_count; i++) {
+ char axsn[sizeof("aux0xN ")];
+
+ sprintf(axsn, "aux0x%x", i);
if (audit_log_pid_context(context, axs->target_pid[i],
axs->target_auid[i],
axs->target_uid[i],
axs->target_sessionid[i],
axs->target_sid[i],
- axs->target_comm[i]))
+ axs->target_comm[i])
+ && audit_log_container_info(context, axsn, axs->target_cid[i]))
Shouldn't this be an OR instead of an AND?
Post by Richard Guy Briggs
call_panic = 1;
+ }
}
if (context->target_pid &&
audit_log_pid_context(context, context->target_pid,
context->target_auid, context->target_uid,
context->target_sessionid,
- context->target_sid, context->target_comm))
+ context->target_sid, context->target_comm)
+ && audit_log_container_info(context, "target", context->target_cid))
Same question.
Post by Richard Guy Briggs
call_panic = 1;
if (context->pwd.dentry && context->pwd.mnt) {
--
paul moore
www.paul-moore.com
Richard Guy Briggs
2018-04-20 01:03:20 UTC
Permalink
Raw Message
Post by Paul Moore
Post by Richard Guy Briggs
Add container ID support to ptrace and signals. In particular, the "op"
field provides a way to label the auxiliary record to which it is
associated.
---
include/linux/audit.h | 16 +++++++++++-----
kernel/audit.c | 12 ++++++++----
kernel/audit.h | 2 ++
kernel/auditsc.c | 19 +++++++++++++++----
4 files changed, 36 insertions(+), 13 deletions(-)
...
Post by Richard Guy Briggs
diff --git a/kernel/audit.c b/kernel/audit.c
index a12f21f..b238be5 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -142,6 +142,7 @@ struct audit_net {
kuid_t audit_sig_uid = INVALID_UID;
pid_t audit_sig_pid = -1;
u32 audit_sig_sid = 0;
+u64 audit_sig_cid = INVALID_CID;
0) [suppressed in audit_alloc]
@@ -1438,6 +1439,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
memcpy(sig_data->ctx, ctx, len);
security_release_secctx(ctx, len);
}
+ sig_data->cid = audit_sig_cid;
audit_send_reply(skb, seq, AUDIT_SIGNAL_INFO, 0, 0,
sig_data, sizeof(*sig_data) + len);
kfree(sig_data);
@@ -2051,20 +2053,22 @@ void audit_log_session_info(struct audit_buffer *ab)
/*
* audit_log_container_info - report container info
*/
-int audit_log_container_info(struct task_struct *tsk, struct audit_context *context)
+int audit_log_container_info(struct audit_context *context,
+ char *op, u64 containerid)
{
struct audit_buffer *ab;
- if (!audit_containerid_set(tsk))
+ if (!cid_valid(containerid))
return 0;
/* Generate AUDIT_CONTAINER_INFO with container ID */
ab = audit_log_start(context, GFP_KERNEL, AUDIT_CONTAINER_INFO);
if (!ab)
return -ENOMEM;
- audit_log_format(ab, "contid=%llu", audit_get_containerid(tsk));
+ audit_log_format(ab, "op=%s contid=%llu", op, containerid);
audit_log_end(ab);
return 0;
}
Let's get these changes into the first patch where
audit_log_container_info() is defined. Why? This inserts a new field
into the record which is a no-no. Yes, it is one single patchset, but
they are still separate patches and who knows which patches a given
distribution and/or tree may decide to backport.
Fair enough. That first thought went through my mind... Would it be
sufficient to move that field addition to the first patch and leave the
rest here to support trace and signals?
Post by Paul Moore
Post by Richard Guy Briggs
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 2bba324..2932ef1 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -113,6 +113,7 @@ struct audit_aux_data_pids {
kuid_t target_uid[AUDIT_AUX_PIDS];
unsigned int target_sessionid[AUDIT_AUX_PIDS];
u32 target_sid[AUDIT_AUX_PIDS];
+ u64 target_cid[AUDIT_AUX_PIDS];
char target_comm[AUDIT_AUX_PIDS][TASK_COMM_LEN];
int pid_count;
};
@@ -1422,21 +1423,27 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts
for (aux = context->aux_pids; aux; aux = aux->next) {
struct audit_aux_data_pids *axs = (void *)aux;
- for (i = 0; i < axs->pid_count; i++)
+ for (i = 0; i < axs->pid_count; i++) {
+ char axsn[sizeof("aux0xN ")];
+
+ sprintf(axsn, "aux0x%x", i);
if (audit_log_pid_context(context, axs->target_pid[i],
axs->target_auid[i],
axs->target_uid[i],
axs->target_sessionid[i],
axs->target_sid[i],
- axs->target_comm[i]))
+ axs->target_comm[i])
+ && audit_log_container_info(context, axsn, axs->target_cid[i]))
Shouldn't this be an OR instead of an AND?
Yes. Bash-brain...
Post by Paul Moore
Post by Richard Guy Briggs
call_panic = 1;
+ }
}
if (context->target_pid &&
audit_log_pid_context(context, context->target_pid,
context->target_auid, context->target_uid,
context->target_sessionid,
- context->target_sid, context->target_comm))
+ context->target_sid, context->target_comm)
+ && audit_log_container_info(context, "target", context->target_cid))
Same question.
Yes.
Post by Paul Moore
Post by Richard Guy Briggs
call_panic = 1;
if (context->pwd.dentry && context->pwd.mnt) {
--
paul moore
www.paul-moore.com
- RGB

--
Richard Guy Briggs <***@redhat.com>
Sr. S/W Engineer, Kernel Security, Base Operating Systems
Remote, Ottawa, Red Hat Canada
IRC: rgb, SunRaycer
Voice: +1.647.777.2635, Internal: (81) 32635
Paul Moore
2018-04-20 16:13:37 UTC
Permalink
Raw Message
...
Post by Richard Guy Briggs
Post by Paul Moore
Post by Richard Guy Briggs
/*
* audit_log_container_info - report container info
*/
-int audit_log_container_info(struct task_struct *tsk, struct audit_context *context)
+int audit_log_container_info(struct audit_context *context,
+ char *op, u64 containerid)
{
struct audit_buffer *ab;
- if (!audit_containerid_set(tsk))
+ if (!cid_valid(containerid))
return 0;
/* Generate AUDIT_CONTAINER_INFO with container ID */
ab = audit_log_start(context, GFP_KERNEL, AUDIT_CONTAINER_INFO);
if (!ab)
return -ENOMEM;
- audit_log_format(ab, "contid=%llu", audit_get_containerid(tsk));
+ audit_log_format(ab, "op=%s contid=%llu", op, containerid);
audit_log_end(ab);
return 0;
}
Let's get these changes into the first patch where
audit_log_container_info() is defined. Why? This inserts a new field
into the record which is a no-no. Yes, it is one single patchset, but
they are still separate patches and who knows which patches a given
distribution and/or tree may decide to backport.
Fair enough. That first thought went through my mind... Would it be
sufficient to move that field addition to the first patch and leave the
rest here to support trace and signals?
I should have been more clear ... yes, that's what I was thinking; the
record format is the important part as it's user visible.
--
paul moore
www.paul-moore.com
Richard Guy Briggs
2018-03-16 09:00:34 UTC
Permalink
Raw Message
Add container ID auxiliary record to mark, watch and tree rule
configuration standalone records.

Signed-off-by: Richard Guy Briggs <***@redhat.com>
---
kernel/audit_fsnotify.c | 5 ++++-
kernel/audit_tree.c | 5 ++++-
kernel/audit_watch.c | 33 +++++++++++++++++++--------------
3 files changed, 27 insertions(+), 16 deletions(-)

diff --git a/kernel/audit_fsnotify.c b/kernel/audit_fsnotify.c
index 52f368b..18c110d 100644
--- a/kernel/audit_fsnotify.c
+++ b/kernel/audit_fsnotify.c
@@ -124,10 +124,11 @@ static void audit_mark_log_rule_change(struct audit_fsnotify_mark *audit_mark, c
{
struct audit_buffer *ab;
struct audit_krule *rule = audit_mark->rule;
+ struct audit_context *context = audit_alloc_local();

if (!audit_enabled)
return;
- ab = audit_log_start(NULL, GFP_NOFS, AUDIT_CONFIG_CHANGE);
+ ab = audit_log_start(context, GFP_NOFS, AUDIT_CONFIG_CHANGE);
if (unlikely(!ab))
return;
audit_log_format(ab, "auid=%u ses=%u op=%s",
@@ -138,6 +139,8 @@ static void audit_mark_log_rule_change(struct audit_fsnotify_mark *audit_mark, c
audit_log_key(ab, rule->filterkey);
audit_log_format(ab, " list=%d res=1", rule->listnr);
audit_log_end(ab);
+ audit_log_container_info(context, "config", audit_get_containerid(current));
+ audit_free_context(context);
}

void audit_remove_mark(struct audit_fsnotify_mark *audit_mark)
diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c
index 67e6956..7c085be 100644
--- a/kernel/audit_tree.c
+++ b/kernel/audit_tree.c
@@ -496,8 +496,9 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree)
static void audit_tree_log_remove_rule(struct audit_krule *rule)
{
struct audit_buffer *ab;
+ struct audit_context *context = audit_alloc_local();

- ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE);
+ ab = audit_log_start(context, GFP_KERNEL, AUDIT_CONFIG_CHANGE);
if (unlikely(!ab))
return;
audit_log_format(ab, "op=remove_rule");
@@ -506,6 +507,8 @@ static void audit_tree_log_remove_rule(struct audit_krule *rule)
audit_log_key(ab, rule->filterkey);
audit_log_format(ab, " list=%d res=1", rule->listnr);
audit_log_end(ab);
+ audit_log_container_info(context, "config", audit_get_containerid(current));
+ audit_free_context(context);
}

static void kill_rules(struct audit_tree *tree)
diff --git a/kernel/audit_watch.c b/kernel/audit_watch.c
index 9eb8b35..60d75a2 100644
--- a/kernel/audit_watch.c
+++ b/kernel/audit_watch.c
@@ -238,20 +238,25 @@ static struct audit_watch *audit_dupe_watch(struct audit_watch *old)

static void audit_watch_log_rule_change(struct audit_krule *r, struct audit_watch *w, char *op)
{
- if (audit_enabled) {
- struct audit_buffer *ab;
- ab = audit_log_start(NULL, GFP_NOFS, AUDIT_CONFIG_CHANGE);
- if (unlikely(!ab))
- return;
- audit_log_format(ab, "auid=%u ses=%u op=%s",
- from_kuid(&init_user_ns, audit_get_loginuid(current)),
- audit_get_sessionid(current), op);
- audit_log_format(ab, " path=");
- audit_log_untrustedstring(ab, w->path);
- audit_log_key(ab, r->filterkey);
- audit_log_format(ab, " list=%d res=1", r->listnr);
- audit_log_end(ab);
- }
+ struct audit_buffer *ab;
+ struct audit_context *context = audit_alloc_local();
+
+ if (!audit_enabled)
+ return;
+
+ ab = audit_log_start(context, GFP_NOFS, AUDIT_CONFIG_CHANGE);
+ if (unlikely(!ab))
+ return;
+ audit_log_format(ab, "auid=%u ses=%u op=%s",
+ from_kuid(&init_user_ns, audit_get_loginuid(current)),
+ audit_get_sessionid(current), op);
+ audit_log_format(ab, " path=");
+ audit_log_untrustedstring(ab, w->path);
+ audit_log_key(ab, r->filterkey);
+ audit_log_format(ab, " list=%d res=1", r->listnr);
+ audit_log_end(ab);
+ audit_log_container_info(context, "config", audit_get_containerid(current));
+ audit_free_context(context);
}

/* Update inode info in audit rules based on filesystem event. */
--
1.8.3.1
Paul Moore
2018-04-19 00:42:43 UTC
Permalink
Raw Message
Post by Richard Guy Briggs
Add container ID auxiliary record to mark, watch and tree rule
configuration standalone records.
---
kernel/audit_fsnotify.c | 5 ++++-
kernel/audit_tree.c | 5 ++++-
kernel/audit_watch.c | 33 +++++++++++++++++++--------------
3 files changed, 27 insertions(+), 16 deletions(-)
diff --git a/kernel/audit_fsnotify.c b/kernel/audit_fsnotify.c
index 52f368b..18c110d 100644
--- a/kernel/audit_fsnotify.c
+++ b/kernel/audit_fsnotify.c
@@ -124,10 +124,11 @@ static void audit_mark_log_rule_change(struct audit_fsnotify_mark *audit_mark, c
{
struct audit_buffer *ab;
struct audit_krule *rule = audit_mark->rule;
+ struct audit_context *context = audit_alloc_local();
if (!audit_enabled)
return;
Move the audit_alloc_local() after the audit_enabled check.
Post by Richard Guy Briggs
- ab = audit_log_start(NULL, GFP_NOFS, AUDIT_CONFIG_CHANGE);
+ ab = audit_log_start(context, GFP_NOFS, AUDIT_CONFIG_CHANGE);
if (unlikely(!ab))
return;
audit_log_format(ab, "auid=%u ses=%u op=%s",
@@ -138,6 +139,8 @@ static void audit_mark_log_rule_change(struct audit_fsnotify_mark *audit_mark, c
audit_log_key(ab, rule->filterkey);
audit_log_format(ab, " list=%d res=1", rule->listnr);
audit_log_end(ab);
+ audit_log_container_info(context, "config", audit_get_containerid(current));
+ audit_free_context(context);
}
void audit_remove_mark(struct audit_fsnotify_mark *audit_mark)
diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c
index 67e6956..7c085be 100644
--- a/kernel/audit_tree.c
+++ b/kernel/audit_tree.c
@@ -496,8 +496,9 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree)
static void audit_tree_log_remove_rule(struct audit_krule *rule)
{
struct audit_buffer *ab;
+ struct audit_context *context = audit_alloc_local();
Sort of independent of the audit container ID work, but shouldn't we
have an audit_enabled check here?
Post by Richard Guy Briggs
- ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE);
+ ab = audit_log_start(context, GFP_KERNEL, AUDIT_CONFIG_CHANGE);
if (unlikely(!ab))
return;
audit_log_format(ab, "op=remove_rule");
@@ -506,6 +507,8 @@ static void audit_tree_log_remove_rule(struct audit_krule *rule)
audit_log_key(ab, rule->filterkey);
audit_log_format(ab, " list=%d res=1", rule->listnr);
audit_log_end(ab);
+ audit_log_container_info(context, "config", audit_get_containerid(current));
+ audit_free_context(context);
}
static void kill_rules(struct audit_tree *tree)
diff --git a/kernel/audit_watch.c b/kernel/audit_watch.c
index 9eb8b35..60d75a2 100644
--- a/kernel/audit_watch.c
+++ b/kernel/audit_watch.c
@@ -238,20 +238,25 @@ static struct audit_watch *audit_dupe_watch(struct audit_watch *old)
static void audit_watch_log_rule_change(struct audit_krule *r, struct audit_watch *w, char *op)
{
- if (audit_enabled) {
- struct audit_buffer *ab;
- ab = audit_log_start(NULL, GFP_NOFS, AUDIT_CONFIG_CHANGE);
- if (unlikely(!ab))
- return;
- audit_log_format(ab, "auid=%u ses=%u op=%s",
- from_kuid(&init_user_ns, audit_get_loginuid(current)),
- audit_get_sessionid(current), op);
- audit_log_format(ab, " path=");
- audit_log_untrustedstring(ab, w->path);
- audit_log_key(ab, r->filterkey);
- audit_log_format(ab, " list=%d res=1", r->listnr);
- audit_log_end(ab);
- }
+ struct audit_buffer *ab;
+ struct audit_context *context = audit_alloc_local();
+
+ if (!audit_enabled)
+ return;
Same as above, do the allocation after the audit_enabled check.
Post by Richard Guy Briggs
+ ab = audit_log_start(context, GFP_NOFS, AUDIT_CONFIG_CHANGE);
+ if (unlikely(!ab))
+ return;
+ audit_log_format(ab, "auid=%u ses=%u op=%s",
+ from_kuid(&init_user_ns, audit_get_loginuid(current)),
+ audit_get_sessionid(current), op);
+ audit_log_format(ab, " path=");
+ audit_log_untrustedstring(ab, w->path);
+ audit_log_key(ab, r->filterkey);
+ audit_log_format(ab, " list=%d res=1", r->listnr);
+ audit_log_end(ab);
+ audit_log_container_info(context, "config", audit_get_containerid(current));
+ audit_free_context(context);
}
--
paul moore
www.paul-moore.com
Richard Guy Briggs
2018-04-19 12:24:45 UTC
Permalink
Raw Message
Post by Paul Moore
Post by Richard Guy Briggs
Add container ID auxiliary record to mark, watch and tree rule
configuration standalone records.
---
kernel/audit_fsnotify.c | 5 ++++-
kernel/audit_tree.c | 5 ++++-
kernel/audit_watch.c | 33 +++++++++++++++++++--------------
3 files changed, 27 insertions(+), 16 deletions(-)
diff --git a/kernel/audit_fsnotify.c b/kernel/audit_fsnotify.c
index 52f368b..18c110d 100644
--- a/kernel/audit_fsnotify.c
+++ b/kernel/audit_fsnotify.c
@@ -124,10 +124,11 @@ static void audit_mark_log_rule_change(struct audit_fsnotify_mark *audit_mark, c
{
struct audit_buffer *ab;
struct audit_krule *rule = audit_mark->rule;
+ struct audit_context *context = audit_alloc_local();
if (!audit_enabled)
return;
Move the audit_alloc_local() after the audit_enabled check.
Already fixed in V3 as previously warned, by making all
AUDIT_CONFIG_CHANGE records SYSCALL auxiliary records.
Post by Paul Moore
Post by Richard Guy Briggs
- ab = audit_log_start(NULL, GFP_NOFS, AUDIT_CONFIG_CHANGE);
+ ab = audit_log_start(context, GFP_NOFS, AUDIT_CONFIG_CHANGE);
if (unlikely(!ab))
return;
audit_log_format(ab, "auid=%u ses=%u op=%s",
@@ -138,6 +139,8 @@ static void audit_mark_log_rule_change(struct audit_fsnotify_mark *audit_mark, c
audit_log_key(ab, rule->filterkey);
audit_log_format(ab, " list=%d res=1", rule->listnr);
audit_log_end(ab);
+ audit_log_container_info(context, "config", audit_get_containerid(current));
+ audit_free_context(context);
}
void audit_remove_mark(struct audit_fsnotify_mark *audit_mark)
diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c
index 67e6956..7c085be 100644
--- a/kernel/audit_tree.c
+++ b/kernel/audit_tree.c
@@ -496,8 +496,9 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree)
static void audit_tree_log_remove_rule(struct audit_krule *rule)
{
struct audit_buffer *ab;
+ struct audit_context *context = audit_alloc_local();
Sort of independent of the audit container ID work, but shouldn't we
have an audit_enabled check here?
Same.
Post by Paul Moore
Post by Richard Guy Briggs
- ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_CONFIG_CHANGE);
+ ab = audit_log_start(context, GFP_KERNEL, AUDIT_CONFIG_CHANGE);
if (unlikely(!ab))
return;
audit_log_format(ab, "op=remove_rule");
@@ -506,6 +507,8 @@ static void audit_tree_log_remove_rule(struct audit_krule *rule)
audit_log_key(ab, rule->filterkey);
audit_log_format(ab, " list=%d res=1", rule->listnr);
audit_log_end(ab);
+ audit_log_container_info(context, "config", audit_get_containerid(current));
+ audit_free_context(context);
}
static void kill_rules(struct audit_tree *tree)
diff --git a/kernel/audit_watch.c b/kernel/audit_watch.c
index 9eb8b35..60d75a2 100644
--- a/kernel/audit_watch.c
+++ b/kernel/audit_watch.c
@@ -238,20 +238,25 @@ static struct audit_watch *audit_dupe_watch(struct audit_watch *old)
static void audit_watch_log_rule_change(struct audit_krule *r, struct audit_watch *w, char *op)
{
- if (audit_enabled) {
- struct audit_buffer *ab;
- ab = audit_log_start(NULL, GFP_NOFS, AUDIT_CONFIG_CHANGE);
- if (unlikely(!ab))
- return;
- audit_log_format(ab, "auid=%u ses=%u op=%s",
- from_kuid(&init_user_ns, audit_get_loginuid(current)),
- audit_get_sessionid(current), op);
- audit_log_format(ab, " path=");
- audit_log_untrustedstring(ab, w->path);
- audit_log_key(ab, r->filterkey);
- audit_log_format(ab, " list=%d res=1", r->listnr);
- audit_log_end(ab);
- }
+ struct audit_buffer *ab;
+ struct audit_context *context = audit_alloc_local();
+
+ if (!audit_enabled)
+ return;
Same as above, do the allocation after the audit_enabled check.
Same.
Post by Paul Moore
Post by Richard Guy Briggs
+ ab = audit_log_start(context, GFP_NOFS, AUDIT_CONFIG_CHANGE);
+ if (unlikely(!ab))
+ return;
+ audit_log_format(ab, "auid=%u ses=%u op=%s",
+ from_kuid(&init_user_ns, audit_get_loginuid(current)),
+ audit_get_sessionid(current), op);
+ audit_log_format(ab, " path=");
+ audit_log_untrustedstring(ab, w->path);
+ audit_log_key(ab, r->filterkey);
+ audit_log_format(ab, " list=%d res=1", r->listnr);
+ audit_log_end(ab);
+ audit_log_container_info(context, "config", audit_get_containerid(current));
+ audit_free_context(context);
}
--
paul moore
www.paul-moore.com
- RGB

--
Richard Guy Briggs <***@redhat.com>
Sr. S/W Engineer, Kernel Security, Base Operating Systems
Remote, Ottawa, Red Hat Canada
IRC: rgb, SunRaycer
Voice: +1.647.777.2635, Internal: (81) 32635
Richard Guy Briggs
2018-03-16 09:00:33 UTC
Permalink
Raw Message
Standalone audit records have the timestamp and serial number generated
on the fly and as such are unique, making them standalone. This new
function audit_alloc_local() generates a local audit context that will
be used only for a standalone record and its auxiliary record(s). The
context is discarded immediately after the local associated records are
produced.

Signed-off-by: Richard Guy Briggs <***@redhat.com>
---
include/linux/audit.h | 8 ++++++++
kernel/auditsc.c | 20 +++++++++++++++++++-
2 files changed, 27 insertions(+), 1 deletion(-)

diff --git a/include/linux/audit.h b/include/linux/audit.h
index ed16bb6..c0b83cb 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -227,7 +227,9 @@ static inline int audit_log_container_info(struct audit_context *context,
/* These are defined in auditsc.c */
/* Public API */
extern int audit_alloc(struct task_struct *task);
+extern struct audit_context *audit_alloc_local(void);
extern void __audit_free(struct task_struct *task);
+extern void audit_free_context(struct audit_context *context);
extern void __audit_syscall_entry(int major, unsigned long a0, unsigned long a1,
unsigned long a2, unsigned long a3);
extern void __audit_syscall_exit(int ret_success, long ret_value);
@@ -472,6 +474,12 @@ static inline int audit_alloc(struct task_struct *task)
{
return 0;
}
+static inline struct audit_context *audit_alloc_local(void)
+{
+ return NULL;
+}
+static inline void audit_free_context(struct audit_context *context)
+{ }
static inline void audit_free(struct task_struct *task)
{ }
static inline void audit_syscall_entry(int major, unsigned long a0,
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 2932ef1..7103d23 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -959,8 +959,26 @@ int audit_alloc(struct task_struct *tsk)
return 0;
}

-static inline void audit_free_context(struct audit_context *context)
+struct audit_context *audit_alloc_local(void)
{
+ struct audit_context *context;
+
+ if (!audit_ever_enabled)
+ return NULL; /* Return if not auditing. */
+
+ context = audit_alloc_context(AUDIT_RECORD_CONTEXT);
+ if (!context)
+ return NULL;
+ context->serial = audit_serial();
+ context->ctime = current_kernel_time64();
+ context->in_syscall = 1;
+ return context;
+}
+
+inline void audit_free_context(struct audit_context *context)
+{
+ if (!context)
+ return;
audit_free_names(context);
unroll_tree_refs(context, NULL, 0);
free_tree_refs(context);
--
1.8.3.1
Paul Moore
2018-04-19 00:39:12 UTC
Permalink
Raw Message
Post by Richard Guy Briggs
Standalone audit records have the timestamp and serial number generated
on the fly and as such are unique, making them standalone. This new
function audit_alloc_local() generates a local audit context that will
be used only for a standalone record and its auxiliary record(s). The
context is discarded immediately after the local associated records are
produced.
---
include/linux/audit.h | 8 ++++++++
kernel/auditsc.c | 20 +++++++++++++++++++-
2 files changed, 27 insertions(+), 1 deletion(-)
diff --git a/include/linux/audit.h b/include/linux/audit.h
index ed16bb6..c0b83cb 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -227,7 +227,9 @@ static inline int audit_log_container_info(struct audit_context *context,
/* These are defined in auditsc.c */
/* Public API */
extern int audit_alloc(struct task_struct *task);
+extern struct audit_context *audit_alloc_local(void);
extern void __audit_free(struct task_struct *task);
+extern void audit_free_context(struct audit_context *context);
extern void __audit_syscall_entry(int major, unsigned long a0, unsigned long a1,
unsigned long a2, unsigned long a3);
extern void __audit_syscall_exit(int ret_success, long ret_value);
@@ -472,6 +474,12 @@ static inline int audit_alloc(struct task_struct *task)
{
return 0;
}
+static inline struct audit_context *audit_alloc_local(void)
+{
+ return NULL;
+}
+static inline void audit_free_context(struct audit_context *context)
+{ }
static inline void audit_free(struct task_struct *task)
{ }
static inline void audit_syscall_entry(int major, unsigned long a0,
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 2932ef1..7103d23 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -959,8 +959,26 @@ int audit_alloc(struct task_struct *tsk)
return 0;
}
-static inline void audit_free_context(struct audit_context *context)
+struct audit_context *audit_alloc_local(void)
{
+ struct audit_context *context;
+
+ if (!audit_ever_enabled)
+ return NULL; /* Return if not auditing. */
+
+ context = audit_alloc_context(AUDIT_RECORD_CONTEXT);
+ if (!context)
+ return NULL;
+ context->serial = audit_serial();
+ context->ctime = current_kernel_time64();
+ context->in_syscall = 1;
+ return context;
+}
+
+inline void audit_free_context(struct audit_context *context)
+{
+ if (!context)
+ return;
audit_free_names(context);
unroll_tree_refs(context, NULL, 0);
free_tree_refs(context);
I'm reserving the option to comment on this idea further as I make my
way through the patchset, but audit_free_context() definitely
shouldn't be declared as an inline function.
--
paul moore
www.paul-moore.com
Richard Guy Briggs
2018-04-20 01:23:46 UTC
Permalink
Raw Message
Post by Paul Moore
Post by Richard Guy Briggs
Standalone audit records have the timestamp and serial number generated
on the fly and as such are unique, making them standalone. This new
function audit_alloc_local() generates a local audit context that will
be used only for a standalone record and its auxiliary record(s). The
context is discarded immediately after the local associated records are
produced.
---
include/linux/audit.h | 8 ++++++++
kernel/auditsc.c | 20 +++++++++++++++++++-
2 files changed, 27 insertions(+), 1 deletion(-)
diff --git a/include/linux/audit.h b/include/linux/audit.h
index ed16bb6..c0b83cb 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -227,7 +227,9 @@ static inline int audit_log_container_info(struct audit_context *context,
/* These are defined in auditsc.c */
/* Public API */
extern int audit_alloc(struct task_struct *task);
+extern struct audit_context *audit_alloc_local(void);
extern void __audit_free(struct task_struct *task);
+extern void audit_free_context(struct audit_context *context);
extern void __audit_syscall_entry(int major, unsigned long a0, unsigned long a1,
unsigned long a2, unsigned long a3);
extern void __audit_syscall_exit(int ret_success, long ret_value);
@@ -472,6 +474,12 @@ static inline int audit_alloc(struct task_struct *task)
{
return 0;
}
+static inline struct audit_context *audit_alloc_local(void)
+{
+ return NULL;
+}
+static inline void audit_free_context(struct audit_context *context)
+{ }
static inline void audit_free(struct task_struct *task)
{ }
static inline void audit_syscall_entry(int major, unsigned long a0,
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 2932ef1..7103d23 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -959,8 +959,26 @@ int audit_alloc(struct task_struct *tsk)
return 0;
}
-static inline void audit_free_context(struct audit_context *context)
+struct audit_context *audit_alloc_local(void)
{
+ struct audit_context *context;
+
+ if (!audit_ever_enabled)
+ return NULL; /* Return if not auditing. */
+
+ context = audit_alloc_context(AUDIT_RECORD_CONTEXT);
+ if (!context)
+ return NULL;
+ context->serial = audit_serial();
+ context->ctime = current_kernel_time64();
+ context->in_syscall = 1;
+ return context;
+}
+
+inline void audit_free_context(struct audit_context *context)
+{
+ if (!context)
+ return;
audit_free_names(context);
unroll_tree_refs(context, NULL, 0);
free_tree_refs(context);
I'm reserving the option to comment on this idea further as I make my
way through the patchset, but audit_free_context() definitely
shouldn't be declared as an inline function.
Ok, I think I follow. When it wasn't exported, inline was fine, but now
that it has been exported, it should no longer be inlined, or should use
an intermediate function name to export so that local uses of it can
remain inline.
Post by Paul Moore
paul moore
- RGB

--
Richard Guy Briggs <***@redhat.com>
Sr. S/W Engineer, Kernel Security, Base Operating Systems
Remote, Ottawa, Red Hat Canada
IRC: rgb, SunRaycer
Voice: +1.647.777.2635, Internal: (81) 32635
Paul Moore
2018-04-20 16:21:33 UTC
Permalink
Raw Message
Post by Richard Guy Briggs
Post by Paul Moore
Post by Richard Guy Briggs
Standalone audit records have the timestamp and serial number generated
on the fly and as such are unique, making them standalone. This new
function audit_alloc_local() generates a local audit context that will
be used only for a standalone record and its auxiliary record(s). The
context is discarded immediately after the local associated records are
produced.
---
include/linux/audit.h | 8 ++++++++
kernel/auditsc.c | 20 +++++++++++++++++++-
2 files changed, 27 insertions(+), 1 deletion(-)
diff --git a/include/linux/audit.h b/include/linux/audit.h
index ed16bb6..c0b83cb 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -227,7 +227,9 @@ static inline int audit_log_container_info(struct audit_context *context,
/* These are defined in auditsc.c */
/* Public API */
extern int audit_alloc(struct task_struct *task);
+extern struct audit_context *audit_alloc_local(void);
extern void __audit_free(struct task_struct *task);
+extern void audit_free_context(struct audit_context *context);
extern void __audit_syscall_entry(int major, unsigned long a0, unsigned long a1,
unsigned long a2, unsigned long a3);
extern void __audit_syscall_exit(int ret_success, long ret_value);
@@ -472,6 +474,12 @@ static inline int audit_alloc(struct task_struct *task)
{
return 0;
}
+static inline struct audit_context *audit_alloc_local(void)
+{
+ return NULL;
+}
+static inline void audit_free_context(struct audit_context *context)
+{ }
static inline void audit_free(struct task_struct *task)
{ }
static inline void audit_syscall_entry(int major, unsigned long a0,
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 2932ef1..7103d23 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -959,8 +959,26 @@ int audit_alloc(struct task_struct *tsk)
return 0;
}
-static inline void audit_free_context(struct audit_context *context)
+struct audit_context *audit_alloc_local(void)
{
+ struct audit_context *context;
+
+ if (!audit_ever_enabled)
+ return NULL; /* Return if not auditing. */
+
+ context = audit_alloc_context(AUDIT_RECORD_CONTEXT);
+ if (!context)
+ return NULL;
+ context->serial = audit_serial();
+ context->ctime = current_kernel_time64();
+ context->in_syscall = 1;
+ return context;
+}
+
+inline void audit_free_context(struct audit_context *context)
+{
+ if (!context)
+ return;
audit_free_names(context);
unroll_tree_refs(context, NULL, 0);
free_tree_refs(context);
I'm reserving the option to comment on this idea further as I make my
way through the patchset, but audit_free_context() definitely
shouldn't be declared as an inline function.
Ok, I think I follow. When it wasn't exported, inline was fine, but now
that it has been exported, it should no longer be inlined ...
Pretty much. Based on a few comments I've seen by compiler folks over
the years, my current thinking is that we shouldn't worry about
explicit inlining static functions in C files (header files are a
different story). The basic idea being that the compiler almost
always does a better job than us stupid developers.
Post by Richard Guy Briggs
... or should use
an intermediate function name to export so that local uses of it can
remain inline.
Possibly, but my guess is that the compiler could (will?) do that by
itself for code that lives in the same file.
--
paul moore
www.paul-moore.com
Richard Guy Briggs
2018-03-16 09:00:37 UTC
Permalink
Raw Message
Add container ID auxiliary records to secure computing and abnormal end
standalone records.

Signed-off-by: Richard Guy Briggs <***@redhat.com>
---
kernel/auditsc.c | 10 ++++++++--
1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 7103d23..2f02ed9 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -2571,6 +2571,7 @@ static void audit_log_task(struct audit_buffer *ab)
void audit_core_dumps(long signr)
{
struct audit_buffer *ab;
+ struct audit_context *context = audit_alloc_local();

if (!audit_enabled)
return;
@@ -2578,19 +2579,22 @@ void audit_core_dumps(long signr)
if (signr == SIGQUIT) /* don't care for those */
return;

- ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_ANOM_ABEND);
+ ab = audit_log_start(context, GFP_KERNEL, AUDIT_ANOM_ABEND);
if (unlikely(!ab))
return;
audit_log_task(ab);
audit_log_format(ab, " sig=%ld res=1", signr);
audit_log_end(ab);
+ audit_log_container_info(context, "abend", audit_get_containerid(current));
+ audit_free_context(context);
}

void __audit_seccomp(unsigned long syscall, long signr, int code)
{
struct audit_buffer *ab;
+ struct audit_context *context = audit_alloc_local();

- ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_SECCOMP);
+ ab = audit_log_start(context, GFP_KERNEL, AUDIT_SECCOMP);
if (unlikely(!ab))
return;
audit_log_task(ab);
@@ -2598,6 +2602,8 @@ void __audit_seccomp(unsigned long syscall, long signr, int code)
signr, syscall_get_arch(), syscall,
in_compat_syscall(), KSTK_EIP(current), code);
audit_log_end(ab);
+ audit_log_container_info(context, "seccomp", audit_get_containerid(current));
+ audit_free_context(context);
}

struct list_head *audit_killed_trees(void)
--
1.8.3.1
Paul Moore
2018-04-19 01:31:50 UTC
Permalink
Raw Message
Post by Richard Guy Briggs
Add container ID auxiliary records to secure computing and abnormal end
standalone records.
---
kernel/auditsc.c | 10 ++++++++--
1 file changed, 8 insertions(+), 2 deletions(-)
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 7103d23..2f02ed9 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -2571,6 +2571,7 @@ static void audit_log_task(struct audit_buffer *ab)
void audit_core_dumps(long signr)
{
struct audit_buffer *ab;
+ struct audit_context *context = audit_alloc_local();
Looking quickly at do_coredump() I *believe* we can use current here.
Post by Richard Guy Briggs
if (!audit_enabled)
return;
@@ -2578,19 +2579,22 @@ void audit_core_dumps(long signr)
if (signr == SIGQUIT) /* don't care for those */
return;
- ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_ANOM_ABEND);
+ ab = audit_log_start(context, GFP_KERNEL, AUDIT_ANOM_ABEND);
if (unlikely(!ab))
return;
audit_log_task(ab);
audit_log_format(ab, " sig=%ld res=1", signr);
audit_log_end(ab);
+ audit_log_container_info(context, "abend", audit_get_containerid(current));
+ audit_free_context(context);
}
void __audit_seccomp(unsigned long syscall, long signr, int code)
{
struct audit_buffer *ab;
+ struct audit_context *context = audit_alloc_local();
We can definitely use current here.
Post by Richard Guy Briggs
- ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_SECCOMP);
+ ab = audit_log_start(context, GFP_KERNEL, AUDIT_SECCOMP);
if (unlikely(!ab))
return;
audit_log_task(ab);
@@ -2598,6 +2602,8 @@ void __audit_seccomp(unsigned long syscall, long signr, int code)
signr, syscall_get_arch(), syscall,
in_compat_syscall(), KSTK_EIP(current), code);
audit_log_end(ab);
+ audit_log_container_info(context, "seccomp", audit_get_containerid(current));
+ audit_free_context(context);
}
struct list_head *audit_killed_trees(void)
--
paul moore
www.paul-moore.com
Richard Guy Briggs
2018-04-20 00:42:18 UTC
Permalink
Raw Message
Post by Paul Moore
Post by Richard Guy Briggs
Add container ID auxiliary records to secure computing and abnormal end
standalone records.
---
kernel/auditsc.c | 10 ++++++++--
1 file changed, 8 insertions(+), 2 deletions(-)
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 7103d23..2f02ed9 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -2571,6 +2571,7 @@ static void audit_log_task(struct audit_buffer *ab)
void audit_core_dumps(long signr)
{
struct audit_buffer *ab;
+ struct audit_context *context = audit_alloc_local();
Looking quickly at do_coredump() I *believe* we can use current here.
Post by Richard Guy Briggs
if (!audit_enabled)
return;
@@ -2578,19 +2579,22 @@ void audit_core_dumps(long signr)
if (signr == SIGQUIT) /* don't care for those */
return;
- ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_ANOM_ABEND);
+ ab = audit_log_start(context, GFP_KERNEL, AUDIT_ANOM_ABEND);
if (unlikely(!ab))
return;
audit_log_task(ab);
audit_log_format(ab, " sig=%ld res=1", signr);
audit_log_end(ab);
+ audit_log_container_info(context, "abend", audit_get_containerid(current));
+ audit_free_context(context);
}
void __audit_seccomp(unsigned long syscall, long signr, int code)
{
struct audit_buffer *ab;
+ struct audit_context *context = audit_alloc_local();
We can definitely use current here.
Ok, so both syscall aux records. That elimintes this patch from the
set, can go in independently.
Post by Paul Moore
Post by Richard Guy Briggs
- ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_SECCOMP);
+ ab = audit_log_start(context, GFP_KERNEL, AUDIT_SECCOMP);
if (unlikely(!ab))
return;
audit_log_task(ab);
@@ -2598,6 +2602,8 @@ void __audit_seccomp(unsigned long syscall, long signr, int code)
signr, syscall_get_arch(), syscall,
in_compat_syscall(), KSTK_EIP(current), code);
audit_log_end(ab);
+ audit_log_container_info(context, "seccomp", audit_get_containerid(current));
+ audit_free_context(context);
}
struct list_head *audit_killed_trees(void)
--
paul moore
www.paul-moore.com
--
Linux-audit mailing list
https://www.redhat.com/mailman/listinfo/linux-audit
- RGB

--
Richard Guy Briggs <***@redhat.com>
Sr. S/W Engineer, Kernel Security, Base Operating Systems
Remote, Ottawa, Red Hat Canada
IRC: rgb, SunRaycer
Voice: +1.647.777.2635, Internal: (81) 32635
Paul Moore
2018-04-20 16:11:09 UTC
Permalink
Raw Message
Post by Richard Guy Briggs
Post by Paul Moore
Post by Richard Guy Briggs
Add container ID auxiliary records to secure computing and abnormal end
standalone records.
---
kernel/auditsc.c | 10 ++++++++--
1 file changed, 8 insertions(+), 2 deletions(-)
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 7103d23..2f02ed9 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -2571,6 +2571,7 @@ static void audit_log_task(struct audit_buffer *ab)
void audit_core_dumps(long signr)
{
struct audit_buffer *ab;
+ struct audit_context *context = audit_alloc_local();
Looking quickly at do_coredump() I *believe* we can use current here.
Post by Richard Guy Briggs
if (!audit_enabled)
return;
@@ -2578,19 +2579,22 @@ void audit_core_dumps(long signr)
if (signr == SIGQUIT) /* don't care for those */
return;
- ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_ANOM_ABEND);
+ ab = audit_log_start(context, GFP_KERNEL, AUDIT_ANOM_ABEND);
if (unlikely(!ab))
return;
audit_log_task(ab);
audit_log_format(ab, " sig=%ld res=1", signr);
audit_log_end(ab);
+ audit_log_container_info(context, "abend", audit_get_containerid(current));
+ audit_free_context(context);
}
void __audit_seccomp(unsigned long syscall, long signr, int code)
{
struct audit_buffer *ab;
+ struct audit_context *context = audit_alloc_local();
We can definitely use current here.
Ok, so both syscall aux records. That elimintes this patch from the
set, can go in independently.
Yep. It should help shrink the audit container ID patchset and
perhaps more importantly it should put some distance between the
connected-record debate and the audit container ID debate.

I understand we are going to need a "local" context for some things,
the network packets are probably the best example, but whenever
possible I would like to connect these records back to a task's
context.
--
paul moore
www.paul-moore.com
Richard Guy Briggs
2018-03-16 09:00:38 UTC
Permalink
Raw Message
Audit events could happen in a network namespace outside of a task
context due to packets received from the net that trigger an auditing
rule prior to being associated with a running task. The network
namespace could in use by multiple containers by association to the
tasks in that network namespace. We still want a way to attribute
these events to any potential containers. Keep a list per network
namespace to track these container identifiiers.

Add/increment the container identifier on:
- initial setting of the container id via /proc
- clone/fork call that inherits a container identifier
- unshare call that inherits a container identifier
- setns call that inherits a container identifier
Delete/decrement the container identifier on:
- an inherited container id dropped when child set
- process exit
- unshare call that drops a net namespace
- setns call that drops a net namespace

See: https://github.com/linux-audit/audit-kernel/issues/32
See: https://github.com/linux-audit/audit-testsuite/issues/64
Signed-off-by: Richard Guy Briggs <***@redhat.com>
---
include/linux/audit.h | 7 +++++++
include/net/net_namespace.h | 12 ++++++++++++
kernel/auditsc.c | 9 ++++++---
kernel/nsproxy.c | 6 ++++++
net/core/net_namespace.c | 45 +++++++++++++++++++++++++++++++++++++++++++++
5 files changed, 76 insertions(+), 3 deletions(-)

diff --git a/include/linux/audit.h b/include/linux/audit.h
index c0b83cb..d9afb7d 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -26,6 +26,7 @@
#include <linux/sched.h>
#include <linux/ptrace.h>
#include <uapi/linux/audit.h>
+#include <linux/refcount.h>

#define AUDIT_INO_UNSET ((unsigned long)-1)
#define AUDIT_DEV_UNSET ((dev_t)-1)
@@ -88,6 +89,12 @@ struct audit_field {
u32 op;
};

+struct audit_containerid {
+ struct list_head list;
+ u64 id;
+ refcount_t refcount;
+};
+
extern int is_audit_feature_set(int which);

extern int __init audit_register_class(int class, unsigned *list);
diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index 0490084..343a428 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -33,6 +33,7 @@
#include <linux/ns_common.h>
#include <linux/idr.h>
#include <linux/skbuff.h>
+#include <linux/audit.h>

struct user_namespace;
struct proc_dir_entry;
@@ -150,6 +151,7 @@ struct net {
#endif
struct sock *diag_nlsk;
atomic_t fnhe_genid;
+ struct list_head audit_containerid;
} __randomize_layout;

#include <linux/seq_file_net.h>
@@ -301,6 +303,16 @@ static inline struct net *read_pnet(const possible_net_t *pnet)
#define __net_initconst __initconst
#endif

+#ifdef CONFIG_NET_NS
+void net_add_audit_containerid(struct net *net, u64 containerid);
+void net_del_audit_containerid(struct net *net, u64 containerid);
+#else
+static inline void net_add_audit_containerid(struct net *, u64)
+{ }
+static inline void net_del_audit_containerid(struct net *, u64)
+{ }
+#endif
+
int peernet2id_alloc(struct net *net, struct net *peer);
int peernet2id(struct net *net, struct net *peer);
bool peernet_has_id(struct net *net, struct net *peer);
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 2f02ed9..208da962 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -75,6 +75,7 @@
#include <linux/uaccess.h>
#include <linux/fsnotify_backend.h>
#include <uapi/linux/limits.h>
+#include <net/net_namespace.h>

#include "audit.h"

@@ -2175,16 +2176,18 @@ static void audit_log_set_containerid(struct task_struct *task, u64 oldcontainer
*/
int audit_set_containerid(struct task_struct *task, u64 containerid)
{
- u64 oldcontainerid;
+ u64 oldcontainerid = audit_get_containerid(task);
int rc;
-
- oldcontainerid = audit_get_containerid(task);
+ struct net *net = task->nsproxy->net_ns;

rc = audit_set_containerid_perm(task, containerid);
if (!rc) {
+ if (cid_valid(oldcontainerid))
+ net_del_audit_containerid(net, oldcontainerid);
task_lock(task);
task->containerid = containerid;
task_unlock(task);
+ net_add_audit_containerid(net, containerid);
}

audit_log_set_containerid(task, oldcontainerid, containerid, rc);
diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c
index f6c5d33..d9f1090 100644
--- a/kernel/nsproxy.c
+++ b/kernel/nsproxy.c
@@ -140,6 +140,7 @@ int copy_namespaces(unsigned long flags, struct task_struct *tsk)
struct nsproxy *old_ns = tsk->nsproxy;
struct user_namespace *user_ns = task_cred_xxx(tsk, user_ns);
struct nsproxy *new_ns;
+ u64 containerid = audit_get_containerid(tsk);

if (likely(!(flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC |
CLONE_NEWPID | CLONE_NEWNET |
@@ -167,6 +168,7 @@ int copy_namespaces(unsigned long flags, struct task_struct *tsk)
return PTR_ERR(new_ns);

tsk->nsproxy = new_ns;
+ net_add_audit_containerid(new_ns->net_ns, containerid);
return 0;
}

@@ -217,6 +219,7 @@ int unshare_nsproxy_namespaces(unsigned long unshare_flags,
void switch_task_namespaces(struct task_struct *p, struct nsproxy *new)
{
struct nsproxy *ns;
+ u64 containerid = audit_get_containerid(p);

might_sleep();

@@ -224,6 +227,9 @@ void switch_task_namespaces(struct task_struct *p, struct nsproxy *new)
ns = p->nsproxy;
p->nsproxy = new;
task_unlock(p);
+ net_del_audit_containerid(ns->net_ns, containerid);
+ if (new)
+ net_add_audit_containerid(new->net_ns, containerid);

if (ns && atomic_dec_and_test(&ns->count))
free_nsproxy(ns);
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 60a71be7..ae30d33 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -22,6 +22,7 @@
#include <net/netlink.h>
#include <net/net_namespace.h>
#include <net/netns/generic.h>
+#include <linux/nsproxy.h>

/*
* Our network namespace constructor/destructor lists
@@ -290,6 +291,7 @@ static __net_init int setup_net(struct net *net, struct user_namespace *user_ns)
net->user_ns = user_ns;
idr_init(&net->netns_ids);
spin_lock_init(&net->nsid_lock);
+ INIT_LIST_HEAD(&net->audit_containerid);

list_for_each_entry(ops, &pernet_list, list) {
error = ops_init(ops, net);
@@ -1067,6 +1069,49 @@ void unregister_pernet_device(struct pernet_operations *ops)
EXPORT_SYMBOL_GPL(unregister_pernet_device);

#ifdef CONFIG_NET_NS
+void net_add_audit_containerid(struct net *net, u64 containerid)
+{
+ struct audit_containerid *cont;
+
+ if (!cid_valid(containerid))
+ return;
+ if (!list_empty(&net->audit_containerid))
+ list_for_each_entry(cont, &net->audit_containerid, list)
+ if (cont->id == containerid) {
+ refcount_inc(&cont->refcount);
+ return;
+ }
+ cont = kmalloc(sizeof(struct audit_containerid), GFP_KERNEL);
+ if (!cont)
+ return;
+ INIT_LIST_HEAD(&cont->list);
+ cont->id = containerid;
+ refcount_set(&cont->refcount, 1);
+ list_add(&cont->list, &net->audit_containerid);
+}
+
+void net_del_audit_containerid(struct net *net, u64 containerid)
+{
+ struct audit_containerid *cont = NULL;
+ int found = 0;
+
+ if (!cid_valid(containerid))
+ return;
+ if (!list_empty(&net->audit_containerid))
+ list_for_each_entry(cont, &net->audit_containerid, list)
+ if (cont->id == containerid) {
+ found = 1;
+ break;
+ }
+ if (!found)
+ return;
+ list_del(&cont->list);
+ if (refcount_dec_and_test(&cont->refcount))
+ kfree(cont);
+}
+#endif
+
+#ifdef CONFIG_NET_NS
static struct ns_common *netns_get(struct task_struct *task)
{
struct net *net = NULL;
--
1.8.3.1
Paul Moore
2018-04-19 01:46:49 UTC
Permalink
Raw Message
Post by Richard Guy Briggs
Audit events could happen in a network namespace outside of a task
context due to packets received from the net that trigger an auditing
rule prior to being associated with a running task. The network
namespace could in use by multiple containers by association to the
tasks in that network namespace. We still want a way to attribute
these events to any potential containers. Keep a list per network
namespace to track these container identifiiers.
- initial setting of the container id via /proc
- clone/fork call that inherits a container identifier
- unshare call that inherits a container identifier
- setns call that inherits a container identifier
- an inherited container id dropped when child set
- process exit
- unshare call that drops a net namespace
- setns call that drops a net namespace
See: https://github.com/linux-audit/audit-kernel/issues/32
See: https://github.com/linux-audit/audit-testsuite/issues/64
---
include/linux/audit.h | 7 +++++++
include/net/net_namespace.h | 12 ++++++++++++
kernel/auditsc.c | 9 ++++++---
kernel/nsproxy.c | 6 ++++++
net/core/net_namespace.c | 45 +++++++++++++++++++++++++++++++++++++++++++++
5 files changed, 76 insertions(+), 3 deletions(-)
...
Post by Richard Guy Briggs
diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index 0490084..343a428 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -33,6 +33,7 @@
#include <linux/ns_common.h>
#include <linux/idr.h>
#include <linux/skbuff.h>
+#include <linux/audit.h>
struct user_namespace;
struct proc_dir_entry;
@@ -150,6 +151,7 @@ struct net {
#endif
struct sock *diag_nlsk;
atomic_t fnhe_genid;
+ struct list_head audit_containerid;
} __randomize_layout;
We talked about this briefly off-list, you should be using audit_net
and the net_generic mechanism instead of this.
Post by Richard Guy Briggs
#include <linux/seq_file_net.h>
@@ -301,6 +303,16 @@ static inline struct net *read_pnet(const possible_net_t *pnet)
#define __net_initconst __initconst
#endif
+#ifdef CONFIG_NET_NS
+void net_add_audit_containerid(struct net *net, u64 containerid);
+void net_del_audit_containerid(struct net *net, u64 containerid);
+#else
+static inline void net_add_audit_containerid(struct net *, u64)
+{ }
+static inline void net_del_audit_containerid(struct net *, u64)
+{ }
+#endif
+
int peernet2id_alloc(struct net *net, struct net *peer);
int peernet2id(struct net *net, struct net *peer);
bool peernet_has_id(struct net *net, struct net *peer);
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 2f02ed9..208da962 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -75,6 +75,7 @@
#include <linux/uaccess.h>
#include <linux/fsnotify_backend.h>
#include <uapi/linux/limits.h>
+#include <net/net_namespace.h>
#include "audit.h"
@@ -2175,16 +2176,18 @@ static void audit_log_set_containerid(struct task_struct *task, u64 oldcontainer
*/
int audit_set_containerid(struct task_struct *task, u64 containerid)
{
- u64 oldcontainerid;
+ u64 oldcontainerid = audit_get_containerid(task);
int rc;
-
- oldcontainerid = audit_get_containerid(task);
+ struct net *net = task->nsproxy->net_ns;
rc = audit_set_containerid_perm(task, containerid);
if (!rc) {
+ if (cid_valid(oldcontainerid))
+ net_del_audit_containerid(net, oldcontainerid);
Using audit_net we can handle this internal to audit, which is a Good Thing.
Post by Richard Guy Briggs
task_lock(task);
task->containerid = containerid;
task_unlock(task);
+ net_add_audit_containerid(net, containerid);
Same.
Post by Richard Guy Briggs
}
audit_log_set_containerid(task, oldcontainerid, containerid, rc);
diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c
index f6c5d33..d9f1090 100644
--- a/kernel/nsproxy.c
+++ b/kernel/nsproxy.c
@@ -140,6 +140,7 @@ int copy_namespaces(unsigned long flags, struct task_struct *tsk)
struct nsproxy *old_ns = tsk->nsproxy;
struct user_namespace *user_ns = task_cred_xxx(tsk, user_ns);
struct nsproxy *new_ns;
+ u64 containerid = audit_get_containerid(tsk);
if (likely(!(flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC |
CLONE_NEWPID | CLONE_NEWNET |
@@ -167,6 +168,7 @@ int copy_namespaces(unsigned long flags, struct task_struct *tsk)
return PTR_ERR(new_ns);
tsk->nsproxy = new_ns;
+ net_add_audit_containerid(new_ns->net_ns, containerid);
return 0;
}
Hopefully we can handle this in audit_net_init(), we just need to
figure out where we can get the correct task_struct for the audit
container ID (some backpointer in the net struct?).
Post by Richard Guy Briggs
@@ -217,6 +219,7 @@ int unshare_nsproxy_namespaces(unsigned long unshare_flags,
void switch_task_namespaces(struct task_struct *p, struct nsproxy *new)
{
struct nsproxy *ns;
+ u64 containerid = audit_get_containerid(p);
might_sleep();
@@ -224,6 +227,9 @@ void switch_task_namespaces(struct task_struct *p, struct nsproxy *new)
ns = p->nsproxy;
p->nsproxy = new;
task_unlock(p);
+ net_del_audit_containerid(ns->net_ns, containerid);
+ if (new)
+ net_add_audit_containerid(new->net_ns, containerid);
Okay, we might need a hook here for switching namespaces, but I would
much rather it be a generic audit hook that calls directly into audit.
--
paul moore
www.paul-moore.com
Richard Guy Briggs
2018-04-20 20:02:26 UTC
Permalink
Raw Message
Post by Paul Moore
Post by Richard Guy Briggs
Audit events could happen in a network namespace outside of a task
context due to packets received from the net that trigger an auditing
rule prior to being associated with a running task. The network
namespace could in use by multiple containers by association to the
tasks in that network namespace. We still want a way to attribute
these events to any potential containers. Keep a list per network
namespace to track these container identifiiers.
- initial setting of the container id via /proc
- clone/fork call that inherits a container identifier
- unshare call that inherits a container identifier
- setns call that inherits a container identifier
- an inherited container id dropped when child set
- process exit
- unshare call that drops a net namespace
- setns call that drops a net namespace
See: https://github.com/linux-audit/audit-kernel/issues/32
See: https://github.com/linux-audit/audit-testsuite/issues/64
---
include/linux/audit.h | 7 +++++++
include/net/net_namespace.h | 12 ++++++++++++
kernel/auditsc.c | 9 ++++++---
kernel/nsproxy.c | 6 ++++++
net/core/net_namespace.c | 45 +++++++++++++++++++++++++++++++++++++++++++++
5 files changed, 76 insertions(+), 3 deletions(-)
...
Post by Richard Guy Briggs
diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index 0490084..343a428 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -33,6 +33,7 @@
#include <linux/ns_common.h>
#include <linux/idr.h>
#include <linux/skbuff.h>
+#include <linux/audit.h>
struct user_namespace;
struct proc_dir_entry;
@@ -150,6 +151,7 @@ struct net {
#endif
struct sock *diag_nlsk;
atomic_t fnhe_genid;
+ struct list_head audit_containerid;
} __randomize_layout;
We talked about this briefly off-list, you should be using audit_net
and the net_generic mechanism instead of this.
Post by Richard Guy Briggs
#include <linux/seq_file_net.h>
@@ -301,6 +303,16 @@ static inline struct net *read_pnet(const possible_net_t *pnet)
#define __net_initconst __initconst
#endif
+#ifdef CONFIG_NET_NS
+void net_add_audit_containerid(struct net *net, u64 containerid);
+void net_del_audit_containerid(struct net *net, u64 containerid);
+#else
+static inline void net_add_audit_containerid(struct net *, u64)
+{ }
+static inline void net_del_audit_containerid(struct net *, u64)
+{ }
+#endif
+
int peernet2id_alloc(struct net *net, struct net *peer);
int peernet2id(struct net *net, struct net *peer);
bool peernet_has_id(struct net *net, struct net *peer);
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 2f02ed9..208da962 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -75,6 +75,7 @@
#include <linux/uaccess.h>
#include <linux/fsnotify_backend.h>
#include <uapi/linux/limits.h>
+#include <net/net_namespace.h>
#include "audit.h"
@@ -2175,16 +2176,18 @@ static void audit_log_set_containerid(struct task_struct *task, u64 oldcontainer
*/
int audit_set_containerid(struct task_struct *task, u64 containerid)
{
- u64 oldcontainerid;
+ u64 oldcontainerid = audit_get_containerid(task);
int rc;
-
- oldcontainerid = audit_get_containerid(task);
+ struct net *net = task->nsproxy->net_ns;
rc = audit_set_containerid_perm(task, containerid);
if (!rc) {
+ if (cid_valid(oldcontainerid))
+ net_del_audit_containerid(net, oldcontainerid);
Using audit_net we can handle this internal to audit, which is a Good Thing.
No problem, done.
Post by Paul Moore
Post by Richard Guy Briggs
task_lock(task);
task->containerid = containerid;
task_unlock(task);
+ net_add_audit_containerid(net, containerid);
Same.
Post by Richard Guy Briggs
}
audit_log_set_containerid(task, oldcontainerid, containerid, rc);
diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c
index f6c5d33..d9f1090 100644
--- a/kernel/nsproxy.c
+++ b/kernel/nsproxy.c
@@ -140,6 +140,7 @@ int copy_namespaces(unsigned long flags, struct task_struct *tsk)
struct nsproxy *old_ns = tsk->nsproxy;
struct user_namespace *user_ns = task_cred_xxx(tsk, user_ns);
struct nsproxy *new_ns;
+ u64 containerid = audit_get_containerid(tsk);
if (likely(!(flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC |
CLONE_NEWPID | CLONE_NEWNET |
@@ -167,6 +168,7 @@ int copy_namespaces(unsigned long flags, struct task_struct *tsk)
return PTR_ERR(new_ns);
tsk->nsproxy = new_ns;
+ net_add_audit_containerid(new_ns->net_ns, containerid);
return 0;
}
Hopefully we can handle this in audit_net_init(), we just need to
figure out where we can get the correct task_struct for the audit
container ID (some backpointer in the net struct?).
I don't follow. This needs to happen on every task startup.
audit_net_init() is only called when a new network namespace starts up.
Post by Paul Moore
Post by Richard Guy Briggs
@@ -217,6 +219,7 @@ int unshare_nsproxy_namespaces(unsigned long unshare_flags,
void switch_task_namespaces(struct task_struct *p, struct nsproxy *new)
{
struct nsproxy *ns;
+ u64 containerid = audit_get_containerid(p);
might_sleep();
@@ -224,6 +227,9 @@ void switch_task_namespaces(struct task_struct *p, struct nsproxy *new)
ns = p->nsproxy;
p->nsproxy = new;
task_unlock(p);
+ net_del_audit_containerid(ns->net_ns, containerid);
+ if (new)
+ net_add_audit_containerid(new->net_ns, containerid);
Okay, we might need a hook here for switching namespaces, but I would
much rather it be a generic audit hook that calls directly into audit.
Trivial, done.
Post by Paul Moore
paul moore
- RGB

--
Richard Guy Briggs <***@redhat.com>
Sr. S/W Engineer, Kernel Security, Base Operating Systems
Remote, Ottawa, Red Hat Canada
IRC: rgb, SunRaycer
Voice: +1.647.777.2635, Internal: (81) 32635
Paul Moore
2018-04-20 20:22:18 UTC
Permalink
Raw Message
Post by Richard Guy Briggs
Post by Paul Moore
Post by Richard Guy Briggs
Audit events could happen in a network namespace outside of a task
context due to packets received from the net that trigger an auditing
rule prior to being associated with a running task. The network
namespace could in use by multiple containers by association to the
tasks in that network namespace. We still want a way to attribute
these events to any potential containers. Keep a list per network
namespace to track these container identifiiers.
- initial setting of the container id via /proc
- clone/fork call that inherits a container identifier
- unshare call that inherits a container identifier
- setns call that inherits a container identifier
- an inherited container id dropped when child set
- process exit
- unshare call that drops a net namespace
- setns call that drops a net namespace
See: https://github.com/linux-audit/audit-kernel/issues/32
See: https://github.com/linux-audit/audit-testsuite/issues/64
---
include/linux/audit.h | 7 +++++++
include/net/net_namespace.h | 12 ++++++++++++
kernel/auditsc.c | 9 ++++++---
kernel/nsproxy.c | 6 ++++++
net/core/net_namespace.c | 45 +++++++++++++++++++++++++++++++++++++++++++++
5 files changed, 76 insertions(+), 3 deletions(-)
...
Post by Richard Guy Briggs
Post by Paul Moore
Post by Richard Guy Briggs
diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c
index f6c5d33..d9f1090 100644
--- a/kernel/nsproxy.c
+++ b/kernel/nsproxy.c
@@ -140,6 +140,7 @@ int copy_namespaces(unsigned long flags, struct task_struct *tsk)
struct nsproxy *old_ns = tsk->nsproxy;
struct user_namespace *user_ns = task_cred_xxx(tsk, user_ns);
struct nsproxy *new_ns;
+ u64 containerid = audit_get_containerid(tsk);
if (likely(!(flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC |
CLONE_NEWPID | CLONE_NEWNET |
@@ -167,6 +168,7 @@ int copy_namespaces(unsigned long flags, struct task_struct *tsk)
return PTR_ERR(new_ns);
tsk->nsproxy = new_ns;
+ net_add_audit_containerid(new_ns->net_ns, containerid);
return 0;
}
Hopefully we can handle this in audit_net_init(), we just need to
figure out where we can get the correct task_struct for the audit
container ID (some backpointer in the net struct?).
I don't follow. This needs to happen on every task startup.
audit_net_init() is only called when a new network namespace starts up.
Yep, sorry, my mistake. I must have confused myself when I was
looking at the code.

I'm thinking out loud here, bear with me ...

Assuming we move the netns/audit-container-ID tracking to audit_net,
and considering we already have an audit hook in copy_process() (it
calls audit_alloc()), would this be better handled by the
copy_process() hook? This ignores naming, audit_alloc() reuse, etc.;
those can be easily fixed. I'm just thinking of ways to limit our
impact on the core kernel and leverage our existing interaction
points.
--
paul moore
www.paul-moore.com
Richard Guy Briggs
2018-04-20 20:42:25 UTC
Permalink
Raw Message
Post by Paul Moore
Post by Richard Guy Briggs
Post by Paul Moore
Post by Richard Guy Briggs
Audit events could happen in a network namespace outside of a task
context due to packets received from the net that trigger an auditing
rule prior to being associated with a running task. The network
namespace could in use by multiple containers by association to the
tasks in that network namespace. We still want a way to attribute
these events to any potential containers. Keep a list per network
namespace to track these container identifiiers.
- initial setting of the container id via /proc
- clone/fork call that inherits a container identifier
- unshare call that inherits a container identifier
- setns call that inherits a container identifier
- an inherited container id dropped when child set
- process exit
- unshare call that drops a net namespace
- setns call that drops a net namespace
See: https://github.com/linux-audit/audit-kernel/issues/32
See: https://github.com/linux-audit/audit-testsuite/issues/64
---
include/linux/audit.h | 7 +++++++
include/net/net_namespace.h | 12 ++++++++++++
kernel/auditsc.c | 9 ++++++---
kernel/nsproxy.c | 6 ++++++
net/core/net_namespace.c | 45 +++++++++++++++++++++++++++++++++++++++++++++
5 files changed, 76 insertions(+), 3 deletions(-)
...
Post by Richard Guy Briggs
Post by Paul Moore
Post by Richard Guy Briggs
diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c
index f6c5d33..d9f1090 100644
--- a/kernel/nsproxy.c
+++ b/kernel/nsproxy.c
@@ -140,6 +140,7 @@ int copy_namespaces(unsigned long flags, struct task_struct *tsk)
struct nsproxy *old_ns = tsk->nsproxy;
struct user_namespace *user_ns = task_cred_xxx(tsk, user_ns);
struct nsproxy *new_ns;
+ u64 containerid = audit_get_containerid(tsk);
if (likely(!(flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC |
CLONE_NEWPID | CLONE_NEWNET |
@@ -167,6 +168,7 @@ int copy_namespaces(unsigned long flags, struct task_struct *tsk)
return PTR_ERR(new_ns);
tsk->nsproxy = new_ns;
+ net_add_audit_containerid(new_ns->net_ns, containerid);
return 0;
}
Hopefully we can handle this in audit_net_init(), we just need to
figure out where we can get the correct task_struct for the audit
container ID (some backpointer in the net struct?).
I don't follow. This needs to happen on every task startup.
audit_net_init() is only called when a new network namespace starts up.
Yep, sorry, my mistake. I must have confused myself when I was
looking at the code.
I'm thinking out loud here, bear with me ...
Assuming we move the netns/audit-container-ID tracking to audit_net,
and considering we already have an audit hook in copy_process() (it
calls audit_alloc()), would this be better handled by the
copy_process() hook? This ignores naming, audit_alloc() reuse, etc.;
those can be easily fixed. I'm just thinking of ways to limit our
impact on the core kernel and leverage our existing interaction
points.
The new namespace hasn't been cloned yet and this is the only function
where we have access to both namespaces, so I don't see how that could
work...
Post by Paul Moore
paul moore
- RGB

--
Richard Guy Briggs <***@redhat.com>
Sr. S/W Engineer, Kernel Security, Base Operating Systems
Remote, Ottawa, Red Hat Canada
IRC: rgb, SunRaycer
Voice: +1.647.777.2635, Internal: (81) 32635
Paul Moore
2018-04-21 12:10:46 UTC
Permalink
Raw Message
On April 20, 2018 4:48:34 PM Richard Guy Briggs <***@redhat.com> wrote:
On 2018-04-20 16:22, Paul Moore wrote:
On Fri, Apr 20, 2018 at 4:02 PM, Richard Guy Briggs <***@redhat.com> wrote:
On 2018-04-18 21:46, Paul Moore wrote:
On Fri, Mar 16, 2018 at 5:00 AM, Richard Guy Briggs <***@redhat.com> wrote:
Audit events could happen in a network namespace outside of a task
context due to packets received from the net that trigger an auditing
rule prior to being associated with a running task. The network
namespace could in use by multiple containers by association to the
tasks in that network namespace. We still want a way to attribute
these events to any potential containers. Keep a list per network
namespace to track these container identifiiers.

Add/increment the container identifier on:
- initial setting of the container id via /proc
- clone/fork call that inherits a container identifier
- unshare call that inherits a container identifier
- setns call that inherits a container identifier
Delete/decrement the container identifier on:
- an inherited container id dropped when child set
- process exit
- unshare call that drops a net namespace
- setns call that drops a net namespace

See: https://github.com/linux-audit/audit-kernel/issues/32
See: https://github.com/linux-audit/audit-testsuite/issues/64
Signed-off-by: Richard Guy Briggs <***@redhat.com>
---
include/linux/audit.h | 7 +++++++
include/net/net_namespace.h | 12 ++++++++++++
kernel/auditsc.c | 9 ++++++---
kernel/nsproxy.c | 6 ++++++
net/core/net_namespace.c | 45 +++++++++++++++++++++++++++++++++++++++++++++
5 files changed, 76 insertions(+), 3 deletions(-)

...

diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c
index f6c5d33..d9f1090 100644
--- a/kernel/nsproxy.c
+++ b/kernel/nsproxy.c
@@ -140,6 +140,7 @@ int copy_namespaces(unsigned long flags, struct task_struct *tsk)
struct nsproxy *old_ns = tsk->nsproxy;
struct user_namespace *user_ns = task_cred_xxx(tsk, user_ns);
struct nsproxy *new_ns;
+ u64 containerid = audit_get_containerid(tsk);

if (likely(!(flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC |
CLONE_NEWPID | CLONE_NEWNET |
@@ -167,6 +168,7 @@ int copy_namespaces(unsigned long flags, struct task_struct *tsk)
return PTR_ERR(new_ns);

tsk->nsproxy = new_ns;
+ net_add_audit_containerid(new_ns->net_ns, containerid);
return 0;
}

Hopefully we can handle this in audit_net_init(), we just need to
figure out where we can get the correct task_struct for the audit
container ID (some backpointer in the net struct?).

I don't follow. This needs to happen on every task startup.
audit_net_init() is only called when a new network namespace starts up.

Yep, sorry, my mistake. I must have confused myself when I was
looking at the code.

I'm thinking out loud here, bear with me ...

Assuming we move the netns/audit-container-ID tracking to audit_net,
and considering we already have an audit hook in copy_process() (it
calls audit_alloc()), would this be better handled by the
copy_process() hook? This ignores naming, audit_alloc() reuse, etc.;
those can be easily fixed. I'm just thinking of ways to limit our
impact on the core kernel and leverage our existing interaction
points.

The new namespace hasn't been cloned yet and this is the only function
where we have access to both namespaces, so I don't see how that could
work...

I'll take another, closer look, with v3.


paul moore

- RGB

--
Richard Guy Briggs <***@redhat.com>
Sr. S/W Engineer, Kernel Security, Base Operating Systems
Remote, Ottawa, Red Hat Canada
IRC: rgb, SunRaycer
Voice: +1.647.777.2635, Internal: (81) 32635


--
paul moore
www.paul-moore.com
Richard Guy Briggs
2018-03-16 09:00:39 UTC
Permalink
Raw Message
Add container ID auxiliary record(s) to NETFILTER_PKT event standalone
records. Iterate through all potential container IDs associated with a
network namespace.

Signed-off-by: Richard Guy Briggs <***@redhat.com>
---
kernel/audit.c | 1 +
kernel/auditsc.c | 2 ++
net/netfilter/xt_AUDIT.c | 15 ++++++++++++++-
3 files changed, 17 insertions(+), 1 deletion(-)

diff --git a/kernel/audit.c b/kernel/audit.c
index 08662b4..3c77e47 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -2102,6 +2102,7 @@ int audit_log_container_info(struct audit_context *context,
audit_log_end(ab);
return 0;
}
+EXPORT_SYMBOL(audit_log_container_info);

void audit_log_key(struct audit_buffer *ab, char *key)
{
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 208da962..af68d01 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -975,6 +975,7 @@ struct audit_context *audit_alloc_local(void)
context->in_syscall = 1;
return context;
}
+EXPORT_SYMBOL(audit_alloc_local);

inline void audit_free_context(struct audit_context *context)
{
@@ -989,6 +990,7 @@ inline void audit_free_context(struct audit_context *context)
audit_proctitle_free(context);
kfree(context);
}
+EXPORT_SYMBOL(audit_free_context);

static int audit_log_pid_context(struct audit_context *context, pid_t pid,
kuid_t auid, kuid_t uid, unsigned int sessionid,
diff --git a/net/netfilter/xt_AUDIT.c b/net/netfilter/xt_AUDIT.c
index c502419..edaa456 100644
--- a/net/netfilter/xt_AUDIT.c
+++ b/net/netfilter/xt_AUDIT.c
@@ -71,10 +71,14 @@ static bool audit_ip6(struct audit_buffer *ab, struct sk_buff *skb)
{
struct audit_buffer *ab;
int fam = -1;
+ struct audit_context *context = audit_alloc_local();
+ struct audit_containerid *cont;
+ int i = 0;
+ struct net *net;

if (audit_enabled == 0)
goto errout;
- ab = audit_log_start(NULL, GFP_ATOMIC, AUDIT_NETFILTER_PKT);
+ ab = audit_log_start(context, GFP_ATOMIC, AUDIT_NETFILTER_PKT);
if (ab == NULL)
goto errout;

@@ -104,7 +108,16 @@ static bool audit_ip6(struct audit_buffer *ab, struct sk_buff *skb)

audit_log_end(ab);

+ net = sock_net(NETLINK_CB(skb).sk);
+ list_for_each_entry(cont, &net->audit_containerid, list) {
+ char buf[14];
+
+ sprintf(buf, "net%u", i++);
+ audit_log_container_info(context, buf, cont->id);
+ }
+
errout:
+ audit_free_context(context);
return XT_CONTINUE;
}
--
1.8.3.1
Paul Moore
2018-04-19 02:10:10 UTC
Permalink
Raw Message
Post by Richard Guy Briggs
Add container ID auxiliary record(s) to NETFILTER_PKT event standalone
records. Iterate through all potential container IDs associated with a
network namespace.
---
kernel/audit.c | 1 +
kernel/auditsc.c | 2 ++
net/netfilter/xt_AUDIT.c | 15 ++++++++++++++-
3 files changed, 17 insertions(+), 1 deletion(-)
diff --git a/kernel/audit.c b/kernel/audit.c
index 08662b4..3c77e47 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -2102,6 +2102,7 @@ int audit_log_container_info(struct audit_context *context,
audit_log_end(ab);
return 0;
}
+EXPORT_SYMBOL(audit_log_container_info);
void audit_log_key(struct audit_buffer *ab, char *key)
{
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 208da962..af68d01 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -975,6 +975,7 @@ struct audit_context *audit_alloc_local(void)
context->in_syscall = 1;
return context;
}
+EXPORT_SYMBOL(audit_alloc_local);
inline void audit_free_context(struct audit_context *context)
{
@@ -989,6 +990,7 @@ inline void audit_free_context(struct audit_context *context)
audit_proctitle_free(context);
kfree(context);
}
+EXPORT_SYMBOL(audit_free_context);
static int audit_log_pid_context(struct audit_context *context, pid_t pid,
kuid_t auid, kuid_t uid, unsigned int sessionid,
diff --git a/net/netfilter/xt_AUDIT.c b/net/netfilter/xt_AUDIT.c
index c502419..edaa456 100644
--- a/net/netfilter/xt_AUDIT.c
+++ b/net/netfilter/xt_AUDIT.c
@@ -71,10 +71,14 @@ static bool audit_ip6(struct audit_buffer *ab, struct sk_buff *skb)
{
struct audit_buffer *ab;
int fam = -1;
+ struct audit_context *context = audit_alloc_local();
+ struct audit_containerid *cont;
+ int i = 0;
+ struct net *net;
if (audit_enabled == 0)
goto errout;
Do I need to say it? I probably should ... the allocation should
happen after the audit_enabled check.
Post by Richard Guy Briggs
- ab = audit_log_start(NULL, GFP_ATOMIC, AUDIT_NETFILTER_PKT);
+ ab = audit_log_start(context, GFP_ATOMIC, AUDIT_NETFILTER_PKT);
if (ab == NULL)
goto errout;
@@ -104,7 +108,16 @@ static bool audit_ip6(struct audit_buffer *ab, struct sk_buff *skb)
audit_log_end(ab);
+ net = sock_net(NETLINK_CB(skb).sk);
+ list_for_each_entry(cont, &net->audit_containerid, list) {
+ char buf[14];
+
+ sprintf(buf, "net%u", i++);
+ audit_log_container_info(context, buf, cont->id);
+ }
It seems like this could (should?) be hidden inside an audit function,
e.g. audit_log_net_containers() or something like that.
Post by Richard Guy Briggs
+ audit_free_context(context);
return XT_CONTINUE;
}
--
paul moore
www.paul-moore.com
Richard Guy Briggs
2018-04-19 12:45:51 UTC
Permalink
Raw Message
Post by Paul Moore
Post by Richard Guy Briggs
Add container ID auxiliary record(s) to NETFILTER_PKT event standalone
records. Iterate through all potential container IDs associated with a
network namespace.
---
kernel/audit.c | 1 +
kernel/auditsc.c | 2 ++
net/netfilter/xt_AUDIT.c | 15 ++++++++++++++-
3 files changed, 17 insertions(+), 1 deletion(-)
diff --git a/kernel/audit.c b/kernel/audit.c
index 08662b4..3c77e47 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -2102,6 +2102,7 @@ int audit_log_container_info(struct audit_context *context,
audit_log_end(ab);
return 0;
}
+EXPORT_SYMBOL(audit_log_container_info);
void audit_log_key(struct audit_buffer *ab, char *key)
{
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 208da962..af68d01 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -975,6 +975,7 @@ struct audit_context *audit_alloc_local(void)
context->in_syscall = 1;
return context;
}
+EXPORT_SYMBOL(audit_alloc_local);
inline void audit_free_context(struct audit_context *context)
{
@@ -989,6 +990,7 @@ inline void audit_free_context(struct audit_context *context)
audit_proctitle_free(context);
kfree(context);
}
+EXPORT_SYMBOL(audit_free_context);
static int audit_log_pid_context(struct audit_context *context, pid_t pid,
kuid_t auid, kuid_t uid, unsigned int sessionid,
diff --git a/net/netfilter/xt_AUDIT.c b/net/netfilter/xt_AUDIT.c
index c502419..edaa456 100644
--- a/net/netfilter/xt_AUDIT.c
+++ b/net/netfilter/xt_AUDIT.c
@@ -71,10 +71,14 @@ static bool audit_ip6(struct audit_buffer *ab, struct sk_buff *skb)
{
struct audit_buffer *ab;
int fam = -1;
+ struct audit_context *context = audit_alloc_local();
+ struct audit_containerid *cont;
+ int i = 0;
+ struct net *net;
if (audit_enabled == 0)
goto errout;
Do I need to say it? I probably should ... the allocation should
happen after the audit_enabled check.
Already fixed in V3 in my tree a couple of weeks ago...
More timely review please?
Post by Paul Moore
Post by Richard Guy Briggs
- ab = audit_log_start(NULL, GFP_ATOMIC, AUDIT_NETFILTER_PKT);
+ ab = audit_log_start(context, GFP_ATOMIC, AUDIT_NETFILTER_PKT);
if (ab == NULL)
goto errout;
@@ -104,7 +108,16 @@ static bool audit_ip6(struct audit_buffer *ab, struct sk_buff *skb)
audit_log_end(ab);
+ net = sock_net(NETLINK_CB(skb).sk);
+ list_for_each_entry(cont, &net->audit_containerid, list) {
+ char buf[14];
+
+ sprintf(buf, "net%u", i++);
+ audit_log_container_info(context, buf, cont->id);
+ }
It seems like this could (should?) be hidden inside an audit function,
e.g. audit_log_net_containers() or something like that.
Perhaps... It was open-coded since at this point there are no other
users. That'll make this tidier though.
Post by Paul Moore
Post by Richard Guy Briggs
+ audit_free_context(context);
return XT_CONTINUE;
}
--
paul moore
www.paul-moore.com
- RGB

--
Richard Guy Briggs <***@redhat.com>
Sr. S/W Engineer, Kernel Security, Base Operating Systems
Remote, Ottawa, Red Hat Canada
IRC: rgb, SunRaycer
Voice: +1.647.777.2635, Internal: (81) 32635
Paul Moore
2018-04-19 13:13:17 UTC
Permalink
Raw Message
Post by Richard Guy Briggs
Post by Paul Moore
Post by Richard Guy Briggs
Add container ID auxiliary record(s) to NETFILTER_PKT event standalone
records. Iterate through all potential container IDs associated with a
network namespace.
---
kernel/audit.c | 1 +
kernel/auditsc.c | 2 ++
net/netfilter/xt_AUDIT.c | 15 ++++++++++++++-
3 files changed, 17 insertions(+), 1 deletion(-)
diff --git a/kernel/audit.c b/kernel/audit.c
index 08662b4..3c77e47 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -2102,6 +2102,7 @@ int audit_log_container_info(struct audit_context *context,
audit_log_end(ab);
return 0;
}
+EXPORT_SYMBOL(audit_log_container_info);
void audit_log_key(struct audit_buffer *ab, char *key)
{
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 208da962..af68d01 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -975,6 +975,7 @@ struct audit_context *audit_alloc_local(void)
context->in_syscall = 1;
return context;
}
+EXPORT_SYMBOL(audit_alloc_local);
inline void audit_free_context(struct audit_context *context)
{
@@ -989,6 +990,7 @@ inline void audit_free_context(struct audit_context *context)
audit_proctitle_free(context);
kfree(context);
}
+EXPORT_SYMBOL(audit_free_context);
static int audit_log_pid_context(struct audit_context *context, pid_t pid,
kuid_t auid, kuid_t uid, unsigned int sessionid,
diff --git a/net/netfilter/xt_AUDIT.c b/net/netfilter/xt_AUDIT.c
index c502419..edaa456 100644
--- a/net/netfilter/xt_AUDIT.c
+++ b/net/netfilter/xt_AUDIT.c
@@ -71,10 +71,14 @@ static bool audit_ip6(struct audit_buffer *ab, struct sk_buff *skb)
{
struct audit_buffer *ab;
int fam = -1;
+ struct audit_context *context = audit_alloc_local();
+ struct audit_containerid *cont;
+ int i = 0;
+ struct net *net;
if (audit_enabled == 0)
goto errout;
Do I need to say it? I probably should ... the allocation should
happen after the audit_enabled check.
Already fixed in V3 in my tree a couple of weeks ago...
... which you never posted, at least not anywhere I've seen. Which
effectively means I wasted a good chunk of time reviewing this code
late last night. Awesome.
Post by Richard Guy Briggs
More timely review please?
More patience on your part?
Post by Richard Guy Briggs
Post by Paul Moore
Post by Richard Guy Briggs
- ab = audit_log_start(NULL, GFP_ATOMIC, AUDIT_NETFILTER_PKT);
+ ab = audit_log_start(context, GFP_ATOMIC, AUDIT_NETFILTER_PKT);
if (ab == NULL)
goto errout;
@@ -104,7 +108,16 @@ static bool audit_ip6(struct audit_buffer *ab, struct sk_buff *skb)
audit_log_end(ab);
+ net = sock_net(NETLINK_CB(skb).sk);
+ list_for_each_entry(cont, &net->audit_containerid, list) {
+ char buf[14];
+
+ sprintf(buf, "net%u", i++);
+ audit_log_container_info(context, buf, cont->id);
+ }
It seems like this could (should?) be hidden inside an audit function,
e.g. audit_log_net_containers() or something like that.
Perhaps... It was open-coded since at this point there are no other
users. That'll make this tidier though.
If the code was all contained within a single subsystem them I would
generally agree that open coding is preferable, but since we are
crossing a subsystem boundary I think it would be preferable to
abstract away the details into a separate function.

This will probably also be necessary once you change to using the
audit_net/net_generic mechanism.
--
paul moore
www.paul-moore.com
Richard Guy Briggs
2018-03-29 09:01:32 UTC
Permalink
Raw Message
On Fri, 16 Mar 2018 05:00:28 -0400
Post by Richard Guy Briggs
Implement the proc fs write to set the audit container ID of a process,
emitting an AUDIT_CONTAINER record to document the event.
A little detail, but still...
I am understanding that you would prefer more context (as opposed to
operational detail) in the description, laying out the use case for this
patch(set)?
Post by Richard Guy Briggs
+static int audit_set_containerid_perm(struct task_struct *task, u64 containerid)
+{
+ struct task_struct *parent;
+ u64 pcontainerid, ccontainerid;
+
+ /* Don't allow to set our own containerid */
+ if (current == task)
+ return -EPERM;
+ /* Don't allow the containerid to be unset */
+ if (!cid_valid(containerid))
+ return -EINVAL;
I went looking for cid_valid(), but it turns out you don't add it until
patch 5. That, I expect, will not be good for bisectability (or patch
review).
Nice catch, thanks Jon. That is very likely another victim of a git
rebase to re-order afterthoughts in the right place. I'll need to be
more careful of that class of bug, rethink my workflow, or script builds
to verify each commit is compilable.
Thanks,
jon
- RGB

--
Richard Guy Briggs <***@redhat.com>
Sr. S/W Engineer, Kernel Security, Base Operating Systems
Remote, Ottawa, Red Hat Canada
IRC: rgb, SunRaycer
Voice: +1.647.777.2635, Internal: (81) 32635
Jonathan Corbet
2018-03-29 13:03:27 UTC
Permalink
Raw Message
On Thu, 29 Mar 2018 05:01:32 -0400
Post by Richard Guy Briggs
A little detail, but still...
I am understanding that you would prefer more context (as opposed to
operational detail) in the description, laying out the use case for this
patch(set)?
No, sorry, "a little detail" was referring to my comment. The use case,
I believe, has been well described.

Thanks,

jon
Richard Guy Briggs
2018-03-30 05:06:39 UTC
Permalink
Raw Message
Post by Jonathan Corbet
On Thu, 29 Mar 2018 05:01:32 -0400
Post by Richard Guy Briggs
A little detail, but still...
I am understanding that you would prefer more context (as opposed to
operational detail) in the description, laying out the use case for this
patch(set)?
No, sorry, "a little detail" was referring to my comment. The use case,
I believe, has been well described.
Ah! "A minor nit". :-)
Post by Jonathan Corbet
jon
- RGB

--
Richard Guy Briggs <***@redhat.com>
Sr. S/W Engineer, Kernel Security, Base Operating Systems
Remote, Ottawa, Red Hat Canada
IRC: rgb, SunRaycer
Voice: +1.647.777.2635, Internal: (81) 32635
Paul Moore
2018-04-18 23:47:44 UTC
Permalink
Raw Message
Post by Richard Guy Briggs
Implement the proc fs write to set the audit container ID of a process,
emitting an AUDIT_CONTAINER record to document the event.
This is a write from the container orchestrator task to a proc entry of
the form /proc/PID/containerid where PID is the process ID of the newly
created task that is to become the first task in a container, or an
additional task added to a container.
The write expects up to a u64 value (unset: 18446744073709551615).
type=CONTAINER msg=audit(1519903238.968:261): op=set pid=596 uid=0 subj=unconfined_u:unconfined_r:unconfined_t:s0-s0:c0.c1023 auid=0 tty=pts0 ses=1 opid=596 old-contid=18446744073709551615 contid=123455 res=0
The "op" field indicates an initial set. The "pid" to "ses" fields are
the orchestrator while the "opid" field is the object's PID, the process
being "contained". Old and new container ID values are given in the
"contid" fields, while res indicates its success.
It is not permitted to self-set, unset or re-set the container ID. A
child inherits its parent's container ID, but then can be set only once
after.
See: https://github.com/linux-audit/audit-kernel/issues/32
---
fs/proc/base.c | 37 ++++++++++++++++++++
include/linux/audit.h | 16 +++++++++
include/linux/init_task.h | 4 ++-
include/linux/sched.h | 1 +
include/uapi/linux/audit.h | 2 ++
kernel/auditsc.c | 84 ++++++++++++++++++++++++++++++++++++++++++++++
6 files changed, 143 insertions(+), 1 deletion(-)
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 60316b5..6ce4fbe 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -1299,6 +1299,41 @@ static ssize_t proc_sessionid_read(struct file * file, char __user * buf,
.read = proc_sessionid_read,
.llseek = generic_file_llseek,
};
+
+static ssize_t proc_containerid_write(struct file *file, const char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ struct inode *inode = file_inode(file);
+ u64 containerid;
+ int rv;
+ struct task_struct *task = get_proc_task(inode);
+
+ if (!task)
+ return -ESRCH;
+ if (*ppos != 0) {
+ /* No partial writes. */
+ put_task_struct(task);
+ return -EINVAL;
+ }
+
+ rv = kstrtou64_from_user(buf, count, 10, &containerid);
+ if (rv < 0) {
+ put_task_struct(task);
+ return rv;
+ }
+
+ rv = audit_set_containerid(task, containerid);
+ put_task_struct(task);
+ if (rv < 0)
+ return rv;
+ return count;
+}
+
+static const struct file_operations proc_containerid_operations = {
+ .write = proc_containerid_write,
+ .llseek = generic_file_llseek,
+};
+
#endif
#ifdef CONFIG_FAULT_INJECTION
@@ -2961,6 +2996,7 @@ static int proc_pid_patch_state(struct seq_file *m, struct pid_namespace *ns,
#ifdef CONFIG_AUDITSYSCALL
REG("loginuid", S_IWUSR|S_IRUGO, proc_loginuid_operations),
REG("sessionid", S_IRUGO, proc_sessionid_operations),
+ REG("containerid", S_IWUSR, proc_containerid_operations),
#endif
#ifdef CONFIG_FAULT_INJECTION
REG("make-it-fail", S_IRUGO|S_IWUSR, proc_fault_inject_operations),
@@ -3355,6 +3391,7 @@ static int proc_tid_comm_permission(struct inode *inode, int mask)
#ifdef CONFIG_AUDITSYSCALL
REG("loginuid", S_IWUSR|S_IRUGO, proc_loginuid_operations),
REG("sessionid", S_IRUGO, proc_sessionid_operations),
+ REG("containerid", S_IWUSR, proc_containerid_operations),
#endif
#ifdef CONFIG_FAULT_INJECTION
REG("make-it-fail", S_IRUGO|S_IWUSR, proc_fault_inject_operations),
diff --git a/include/linux/audit.h b/include/linux/audit.h
index af410d9..fe4ba3f 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -29,6 +29,7 @@
#define AUDIT_INO_UNSET ((unsigned long)-1)
#define AUDIT_DEV_UNSET ((dev_t)-1)
+#define INVALID_CID AUDIT_CID_UNSET
Why can't we just use AUDIT_CID_UNSET? Is there an important
distinction? If so, they shouldn't they have different values?

If we do need to keep INVALID_CID, let's rename it to
AUDIT_CID_INVALID so we have some consistency to the naming patterns
and we stress that it is an *audit* container ID.
Post by Richard Guy Briggs
diff --git a/include/linux/sched.h b/include/linux/sched.h
index d258826..1b82191 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -796,6 +796,7 @@ struct task_struct {
#ifdef CONFIG_AUDITSYSCALL
kuid_t loginuid;
unsigned int sessionid;
+ u64 containerid;
This one line addition to the task_struct scares me the most of
anything in this patchset. Why? It's a field named "containerid" in
a perhaps one of the most widely used core kernel structures; the
possibilities for abuse are endless, and it's foolish to think we
would ever be able to adequately police this.

Unfortunately, we can't add the field to audit_context as things
currently stand because we don't always allocate an audit_context,
it's dependent on the system's configuration, and we need to track the
audit container ID for a given process, regardless of the audit
configuration. Pretty much the same reason why loginuid and sessionid
are located directly in task_struct now. As I stressed during the
design phase, I really want to keep this as an *audit* container ID
and not a general purpose kernel wide container ID. If the kernel
ever grows a general purpose container ID token, I'll be the first in
line to convert the audit code, but I don't want audit to be that
general purpose mechanism ... audit is hated enough as-is ;)

I think the right solution to this is to create another new struct,
audit_task_info (or similar, the name really isn't that important),
which would be stored as a pointer in task_struct and would replace
the audit_context pointer, loginuid, sessionid, and the newly proposed
containerid. The new audit_task_info would always be allocated in the
audit_alloc() function (please use kmem_cache), and the audit_context
pointer included inside would continue to be allocated based on the
existing conditions. By keeping audit_task_info as a pointer inside
task_struct we could hide the structure definition inside
kernel/audit*.c and make it much more difficult for other subsystems
to abuse it.[1]

struct audit_task_info {
kuid_t loginuid;
unsigned int sessionid;
u64 containerid;
struct audit_context *ctx;
}

Actually, we might even want to consider storing audit_context in
audit_task_info (no pointer), or making it a zero length array
(ctx[0]) and going with a variable sized allocation of audit_task_info
... but all that could be done as a follow up optimization once we get
the basic idea sorted.

[1] If for some reason allocating audit_task_info becomes too much
overhead to bear (somewhat doubtful since we would only do it at task
creation), we could do some ugly tricks to directly include an
audit_task_struct chunk in task_struct but I'd like to avoid that if
possible (and I think we can).
Post by Richard Guy Briggs
#endif
struct seccomp seccomp;
...
Post by Richard Guy Briggs
diff --git a/include/uapi/linux/audit.h b/include/uapi/linux/audit.h
index 4e61a9e..921a71f 100644
--- a/include/uapi/linux/audit.h
+++ b/include/uapi/linux/audit.h
@@ -71,6 +71,7 @@
#define AUDIT_TTY_SET 1017 /* Set TTY auditing status */
#define AUDIT_SET_FEATURE 1018 /* Turn an audit feature on or off */
#define AUDIT_GET_FEATURE 1019 /* Get which features are enabled */
+#define AUDIT_CONTAINER 1020 /* Define the container id and information */
#define AUDIT_FIRST_USER_MSG 1100 /* Userspace messages mostly uninteresting to kernel */
#define AUDIT_USER_AVC 1107 /* We filter this differently */
@@ -465,6 +466,7 @@ struct audit_tty_status {
};
#define AUDIT_UID_UNSET (unsigned int)-1
+#define AUDIT_CID_UNSET ((u64)-1)
I think we need to decide if we want to distinguish between the "host"
(e.g. init ns) and "unset". Looking at this patch (I've only quickly
skimmed the others so far) it would appear that you don't think we
need to worry about this distinction; that's fine, but let's make it
explicit with a comment in the code that AUDIT_CID_UNSET means "unset"
as well as "host".

If we do need to make a distinction, let's add a constant/macro for "host".
Post by Richard Guy Briggs
/* audit_rule_data supports filter rules with both integer and string
* fields. It corresponds with AUDIT_ADD_RULE, AUDIT_DEL_RULE and
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 4e0a4ac..29c8482 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -2073,6 +2073,90 @@ int audit_set_loginuid(kuid_t loginuid)
return rc;
}
+static int audit_set_containerid_perm(struct task_struct *task, u64 containerid)
+{
+ struct task_struct *parent;
+ u64 pcontainerid, ccontainerid;
+
+ /* Don't allow to set our own containerid */
+ if (current == task)
+ return -EPERM;
Why not? Is there some obvious security concern that I missing?

I ask because I suppose it might be possible for some container
runtime to do a fork, setup some of the environment and them exec the
container (before you answer the obvious "namespaces!" please remember
we're not trying to define containers).
Post by Richard Guy Briggs
+ /* Don't allow the containerid to be unset */
+ if (!cid_valid(containerid))
+ return -EINVAL;
+ /* if we don't have caps, reject */
+ if (!capable(CAP_AUDIT_CONTROL))
+ return -EPERM;
+ /* if containerid is unset, allow */
+ if (!audit_containerid_set(task))
+ return 0;
+ /* it is already set, and not inherited from the parent, reject */
+ ccontainerid = audit_get_containerid(task);
+ rcu_read_lock();
+ parent = rcu_dereference(task->real_parent);
+ rcu_read_unlock();
+ task_lock(parent);
+ pcontainerid = audit_get_containerid(parent);
+ task_unlock(parent);
+ if (ccontainerid != pcontainerid)
+ return -EPERM;
+ return 0;
+}
+
+static void audit_log_set_containerid(struct task_struct *task, u64 oldcontainerid,
+ u64 containerid, int rc)
+{
+ struct audit_buffer *ab;
+ uid_t uid;
+ struct tty_struct *tty;
+
+ if (!audit_enabled)
+ return;
+
+ ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_CONTAINER);
+ if (!ab)
+ return;
+
+ uid = from_kuid(&init_user_ns, task_uid(current));
+ tty = audit_get_tty(current);
+
+ audit_log_format(ab, "op=set pid=%d uid=%u", task_tgid_nr(current), uid);
+ audit_log_task_context(ab);
+ audit_log_format(ab, " auid=%u tty=%s ses=%u opid=%d old-contid=%llu contid=%llu res=%d",
+ from_kuid(&init_user_ns, audit_get_loginuid(current)),
+ tty ? tty_name(tty) : "(none)", audit_get_sessionid(current),
+ task_tgid_nr(task), oldcontainerid, containerid, !rc);
+
+ audit_put_tty(tty);
+ audit_log_end(ab);
+}
+
+/**
+ * audit_set_containerid - set current task's audit_context containerid
+ *
+ * Returns 0 on success, -EPERM on permission failure.
+ *
+ * Called (set) from fs/proc/base.c::proc_containerid_write().
+ */
+int audit_set_containerid(struct task_struct *task, u64 containerid)
+{
+ u64 oldcontainerid;
+ int rc;
+
+ oldcontainerid = audit_get_containerid(task);
+
+ rc = audit_set_containerid_perm(task, containerid);
+ if (!rc) {
+ task_lock(task);
+ task->containerid = containerid;
+ task_unlock(task);
+ }
+
+ audit_log_set_containerid(task, oldcontainerid, containerid, rc);
+ return rc;
Why are audit_set_containerid_perm() and audit_log_containerid()
separate functions?
--
paul moore
www.paul-moore.com
Casey Schaufler
2018-04-19 00:41:29 UTC
Permalink
Raw Message
Post by Paul Moore
Post by Richard Guy Briggs
Implement the proc fs write to set the audit container ID of a process,
emitting an AUDIT_CONTAINER record to document the event.
...
diff --git a/include/linux/sched.h b/include/linux/sched.h
index d258826..1b82191 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -796,6 +796,7 @@ struct task_struct {
#ifdef CONFIG_AUDITSYSCALL
kuid_t loginuid;
unsigned int sessionid;
+ u64 containerid;
This one line addition to the task_struct scares me the most of
anything in this patchset. Why? It's a field named "containerid" in
a perhaps one of the most widely used core kernel structures; the
possibilities for abuse are endless, and it's foolish to think we
would ever be able to adequately police this.
If we can get the LSM infrastructure managed task blobs from
module stacking in ahead of this we could create a trivial security
module to manage this. It's not as if there aren't all sorts of
interactions between security modules and the audit system already.
Paul Moore
2018-04-19 00:46:45 UTC
Permalink
Raw Message
Post by Casey Schaufler
Post by Paul Moore
Post by Richard Guy Briggs
Implement the proc fs write to set the audit container ID of a process,
emitting an AUDIT_CONTAINER record to document the event.
...
diff --git a/include/linux/sched.h b/include/linux/sched.h
index d258826..1b82191 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -796,6 +796,7 @@ struct task_struct {
#ifdef CONFIG_AUDITSYSCALL
kuid_t loginuid;
unsigned int sessionid;
+ u64 containerid;
This one line addition to the task_struct scares me the most of
anything in this patchset. Why? It's a field named "containerid" in
a perhaps one of the most widely used core kernel structures; the
possibilities for abuse are endless, and it's foolish to think we
would ever be able to adequately police this.
If we can get the LSM infrastructure managed task blobs from
module stacking in ahead of this we could create a trivial security
module to manage this. It's not as if there aren't all sorts of
interactions between security modules and the audit system already.
While yes, there are plenty of interactions between the two, it is
possible to use audit without the LSMs and I would like to preserve
that. Further, I don't want to entangle two very complicated code
changes or make the audit container ID effort dependent on LSM
stacking.

You're a good salesman Casey, but you're not that good ;)
--
paul moore
www.paul-moore.com
Casey Schaufler
2018-04-19 01:15:46 UTC
Permalink
Raw Message
Post by Paul Moore
Post by Casey Schaufler
Post by Paul Moore
Post by Richard Guy Briggs
Implement the proc fs write to set the audit container ID of a process,
emitting an AUDIT_CONTAINER record to document the event.
...
diff --git a/include/linux/sched.h b/include/linux/sched.h
index d258826..1b82191 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -796,6 +796,7 @@ struct task_struct {
#ifdef CONFIG_AUDITSYSCALL
kuid_t loginuid;
unsigned int sessionid;
+ u64 containerid;
This one line addition to the task_struct scares me the most of
anything in this patchset. Why? It's a field named "containerid" in
a perhaps one of the most widely used core kernel structures; the
possibilities for abuse are endless, and it's foolish to think we
would ever be able to adequately police this.
If we can get the LSM infrastructure managed task blobs from
module stacking in ahead of this we could create a trivial security
module to manage this. It's not as if there aren't all sorts of
interactions between security modules and the audit system already.
While yes, there are plenty of interactions between the two, it is
possible to use audit without the LSMs and I would like to preserve
that.
Fair enough.
Post by Paul Moore
Further, I don't want to entangle two very complicated code
changes or make the audit container ID effort dependent on LSM
stacking.
Also fair, although the use case for container audit IDs is
already pulling in audit, namespaces (yeah, I know it's not
necessary for a container to use namespaces) security modules
(stacked and/or namespaced), cgroups and who knows what else.
Post by Paul Moore
You're a good salesman Casey, but you're not that good ;)
I have to keep the skills sharpened somehow!

OK, I'll grant that this isn't a great fit.
Richard Guy Briggs
2018-04-21 14:34:43 UTC
Permalink
Raw Message
Post by Paul Moore
Post by Richard Guy Briggs
Implement the proc fs write to set the audit container ID of a process,
emitting an AUDIT_CONTAINER record to document the event.
This is a write from the container orchestrator task to a proc entry of
the form /proc/PID/containerid where PID is the process ID of the newly
created task that is to become the first task in a container, or an
additional task added to a container.
The write expects up to a u64 value (unset: 18446744073709551615).
type=CONTAINER msg=audit(1519903238.968:261): op=set pid=596 uid=0 subj=unconfined_u:unconfined_r:unconfined_t:s0-s0:c0.c1023 auid=0 tty=pts0 ses=1 opid=596 old-contid=18446744073709551615 contid=123455 res=0
The "op" field indicates an initial set. The "pid" to "ses" fields are
the orchestrator while the "opid" field is the object's PID, the process
being "contained". Old and new container ID values are given in the
"contid" fields, while res indicates its success.
It is not permitted to self-set, unset or re-set the container ID. A
child inherits its parent's container ID, but then can be set only once
after.
See: https://github.com/linux-audit/audit-kernel/issues/32
---
fs/proc/base.c | 37 ++++++++++++++++++++
include/linux/audit.h | 16 +++++++++
include/linux/init_task.h | 4 ++-
include/linux/sched.h | 1 +
include/uapi/linux/audit.h | 2 ++
kernel/auditsc.c | 84 ++++++++++++++++++++++++++++++++++++++++++++++
6 files changed, 143 insertions(+), 1 deletion(-)
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 60316b5..6ce4fbe 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -1299,6 +1299,41 @@ static ssize_t proc_sessionid_read(struct file * file, char __user * buf,
.read = proc_sessionid_read,
.llseek = generic_file_llseek,
};
+
+static ssize_t proc_containerid_write(struct file *file, const char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ struct inode *inode = file_inode(file);
+ u64 containerid;
+ int rv;
+ struct task_struct *task = get_proc_task(inode);
+
+ if (!task)
+ return -ESRCH;
+ if (*ppos != 0) {
+ /* No partial writes. */
+ put_task_struct(task);
+ return -EINVAL;
+ }
+
+ rv = kstrtou64_from_user(buf, count, 10, &containerid);
+ if (rv < 0) {
+ put_task_struct(task);
+ return rv;
+ }
+
+ rv = audit_set_containerid(task, containerid);
+ put_task_struct(task);
+ if (rv < 0)
+ return rv;
+ return count;
+}
+
+static const struct file_operations proc_containerid_operations = {
+ .write = proc_containerid_write,
+ .llseek = generic_file_llseek,
+};
+
#endif
#ifdef CONFIG_FAULT_INJECTION
@@ -2961,6 +2996,7 @@ static int proc_pid_patch_state(struct seq_file *m, struct pid_namespace *ns,
#ifdef CONFIG_AUDITSYSCALL
REG("loginuid", S_IWUSR|S_IRUGO, proc_loginuid_operations),
REG("sessionid", S_IRUGO, proc_sessionid_operations),
+ REG("containerid", S_IWUSR, proc_containerid_operations),
#endif
#ifdef CONFIG_FAULT_INJECTION
REG("make-it-fail", S_IRUGO|S_IWUSR, proc_fault_inject_operations),
@@ -3355,6 +3391,7 @@ static int proc_tid_comm_permission(struct inode *inode, int mask)
#ifdef CONFIG_AUDITSYSCALL
REG("loginuid", S_IWUSR|S_IRUGO, proc_loginuid_operations),
REG("sessionid", S_IRUGO, proc_sessionid_operations),
+ REG("containerid", S_IWUSR, proc_containerid_operations),
#endif
#ifdef CONFIG_FAULT_INJECTION
REG("make-it-fail", S_IRUGO|S_IWUSR, proc_fault_inject_operations),
diff --git a/include/linux/audit.h b/include/linux/audit.h
index af410d9..fe4ba3f 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -29,6 +29,7 @@
#define AUDIT_INO_UNSET ((unsigned long)-1)
#define AUDIT_DEV_UNSET ((dev_t)-1)
+#define INVALID_CID AUDIT_CID_UNSET
Why can't we just use AUDIT_CID_UNSET? Is there an important
distinction? If so, they shouldn't they have different values?
One was intended as user-facing and the other was intended for kernel
internal. As you point out, this does not appear to be necessary since
they are both the same type. This was to mirror loginuid due to UID
namespace practice to seperate the two to make things very clear that a
userspace view of a UID needed to be translated from the user's user
namespace to the kernel's absolute view of UIDs from the init user
namespace. Since container ID meanings do not depend on any namespace
context, I agree we can use just one and I'd go with AUDIT_CID_UNSET.
Post by Paul Moore
If we do need to keep INVALID_CID, let's rename it to
AUDIT_CID_INVALID so we have some consistency to the naming patterns
and we stress that it is an *audit* container ID.
Post by Richard Guy Briggs
diff --git a/include/linux/sched.h b/include/linux/sched.h
index d258826..1b82191 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -796,6 +796,7 @@ struct task_struct {
#ifdef CONFIG_AUDITSYSCALL
kuid_t loginuid;
unsigned int sessionid;
+ u64 containerid;
This one line addition to the task_struct scares me the most of
anything in this patchset. Why? It's a field named "containerid" in
a perhaps one of the most widely used core kernel structures; the
possibilities for abuse are endless, and it's foolish to think we
would ever be able to adequately police this.
Fair enough.
Post by Paul Moore
Unfortunately, we can't add the field to audit_context as things
currently stand because we don't always allocate an audit_context,
it's dependent on the system's configuration, and we need to track the
audit container ID for a given process, regardless of the audit
configuration. Pretty much the same reason why loginuid and sessionid
are located directly in task_struct now. As I stressed during the
design phase, I really want to keep this as an *audit* container ID
and not a general purpose kernel wide container ID. If the kernel
ever grows a general purpose container ID token, I'll be the first in
line to convert the audit code, but I don't want audit to be that
general purpose mechanism ... audit is hated enough as-is ;)
When would we need an audit container ID when audit is not enabled
enough to have an audit_context?

If it is only used for audit, and audit is the only consumer, and audit
can only use it when it is enabled, then we can just return success to
any write to the proc filehandle, or not even present it. Nothing will
be able to know that value wasn't used.

When are loginuid and sessionid used now when audit is not enabled (or
should I say, explicitly disabled)?
Post by Paul Moore
I think the right solution to this is to create another new struct,
audit_task_info (or similar, the name really isn't that important),
which would be stored as a pointer in task_struct and would replace
the audit_context pointer, loginuid, sessionid, and the newly proposed
containerid. The new audit_task_info would always be allocated in the
audit_alloc() function (please use kmem_cache), and the audit_context
pointer included inside would continue to be allocated based on the
existing conditions. By keeping audit_task_info as a pointer inside
task_struct we could hide the structure definition inside
kernel/audit*.c and make it much more difficult for other subsystems
to abuse it.[1]
struct audit_task_info {
kuid_t loginuid;
unsigned int sessionid;
u64 containerid;
struct audit_context *ctx;
}
I agree this looks like a good change.
Post by Paul Moore
Actually, we might even want to consider storing audit_context in
audit_task_info (no pointer), or making it a zero length array
(ctx[0]) and going with a variable sized allocation of audit_task_info
... but all that could be done as a follow up optimization once we get
the basic idea sorted.
[1] If for some reason allocating audit_task_info becomes too much
overhead to bear (somewhat doubtful since we would only do it at task
creation), we could do some ugly tricks to directly include an
audit_task_struct chunk in task_struct but I'd like to avoid that if
possible (and I think we can).
Post by Richard Guy Briggs
#endif
struct seccomp seccomp;
...
Post by Richard Guy Briggs
diff --git a/include/uapi/linux/audit.h b/include/uapi/linux/audit.h
index 4e61a9e..921a71f 100644
--- a/include/uapi/linux/audit.h
+++ b/include/uapi/linux/audit.h
@@ -71,6 +71,7 @@
#define AUDIT_TTY_SET 1017 /* Set TTY auditing status */
#define AUDIT_SET_FEATURE 1018 /* Turn an audit feature on or off */
#define AUDIT_GET_FEATURE 1019 /* Get which features are enabled */
+#define AUDIT_CONTAINER 1020 /* Define the container id and information */
#define AUDIT_FIRST_USER_MSG 1100 /* Userspace messages mostly uninteresting to kernel */
#define AUDIT_USER_AVC 1107 /* We filter this differently */
@@ -465,6 +466,7 @@ struct audit_tty_status {
};
#define AUDIT_UID_UNSET (unsigned int)-1
+#define AUDIT_CID_UNSET ((u64)-1)
I think we need to decide if we want to distinguish between the "host"
(e.g. init ns) and "unset". Looking at this patch (I've only quickly
skimmed the others so far) it would appear that you don't think we
need to worry about this distinction; that's fine, but let's make it
explicit with a comment in the code that AUDIT_CID_UNSET means "unset"
as well as "host".
I don't see any reason to distinguish between "host" and "unset". Since
a container doesn't have a concrete definition based in namespaces, the
initial namespace set is meaningless here.

Is there value in having a container orchestrator process have a
reserved container ID that has a policy distinct from any other
container? If so, then I could see the value in making the distinction.
For example, I've heard of interest in systemd acting as a container
orchestrator, so if it took on that role as PID 1, then every process in
the system would inherit that ID and none would be unset.

I can't picture how having seperate "host" and "unset" values helps us.
Post by Paul Moore
If we do need to make a distinction, let's add a constant/macro for "host".
Currently "unset" is -1 which fits the convention used for sessionid and
loginuid and a number of others, so I think it makes sense to stick with
that. If we decide we need a "host" flag, would it make sense to use 0
or (u64)-2?
Post by Paul Moore
Post by Richard Guy Briggs
/* audit_rule_data supports filter rules with both integer and string
* fields. It corresponds with AUDIT_ADD_RULE, AUDIT_DEL_RULE and
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 4e0a4ac..29c8482 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -2073,6 +2073,90 @@ int audit_set_loginuid(kuid_t loginuid)
return rc;
}
+static int audit_set_containerid_perm(struct task_struct *task, u64 containerid)
+{
+ struct task_struct *parent;
+ u64 pcontainerid, ccontainerid;
+
+ /* Don't allow to set our own containerid */
+ if (current == task)
+ return -EPERM;
Why not? Is there some obvious security concern that I missing?
We then lose the distinction in the AUDIT_CONTAINER record between the
initiating PID and the target PID. This was outlined in the proposal.

Having said that, I'm still not sure we have protected sufficiently from
a child turning around and setting it's parent's as yet unset or
inherited audit container ID.
Post by Paul Moore
I ask because I suppose it might be possible for some container
runtime to do a fork, setup some of the environment and them exec the
container (before you answer the obvious "namespaces!" please remember
we're not trying to define containers).
I don't think namespaces have any bearing on this concern since none are
required.
Post by Paul Moore
Post by Richard Guy Briggs
+ /* Don't allow the containerid to be unset */
+ if (!cid_valid(containerid))
+ return -EINVAL;
+ /* if we don't have caps, reject */
+ if (!capable(CAP_AUDIT_CONTROL))
+ return -EPERM;
+ /* if containerid is unset, allow */
+ if (!audit_containerid_set(task))
+ return 0;
+ /* it is already set, and not inherited from the parent, reject */
+ ccontainerid = audit_get_containerid(task);
+ rcu_read_lock();
+ parent = rcu_dereference(task->real_parent);
+ rcu_read_unlock();
+ task_lock(parent);
+ pcontainerid = audit_get_containerid(parent);
+ task_unlock(parent);
+ if (ccontainerid != pcontainerid)
+ return -EPERM;
+ return 0;
+}
+
+static void audit_log_set_containerid(struct task_struct *task, u64 oldcontainerid,
+ u64 containerid, int rc)
+{
+ struct audit_buffer *ab;
+ uid_t uid;
+ struct tty_struct *tty;
+
+ if (!audit_enabled)
+ return;
+
+ ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_CONTAINER);
+ if (!ab)
+ return;
+
+ uid = from_kuid(&init_user_ns, task_uid(current));
+ tty = audit_get_tty(current);
+
+ audit_log_format(ab, "op=set pid=%d uid=%u", task_tgid_nr(current), uid);
+ audit_log_task_context(ab);
+ audit_log_format(ab, " auid=%u tty=%s ses=%u opid=%d old-contid=%llu contid=%llu res=%d",
+ from_kuid(&init_user_ns, audit_get_loginuid(current)),
+ tty ? tty_name(tty) : "(none)", audit_get_sessionid(current),
+ task_tgid_nr(task), oldcontainerid, containerid, !rc);
+
+ audit_put_tty(tty);
+ audit_log_end(ab);
+}
+
+/**
+ * audit_set_containerid - set current task's audit_context containerid
+ *
+ * Returns 0 on success, -EPERM on permission failure.
+ *
+ * Called (set) from fs/proc/base.c::proc_containerid_write().
+ */
+int audit_set_containerid(struct task_struct *task, u64 containerid)
+{
+ u64 oldcontainerid;
+ int rc;
+
+ oldcontainerid = audit_get_containerid(task);
+
+ rc = audit_set_containerid_perm(task, containerid);
+ if (!rc) {
+ task_lock(task);
+ task->containerid = containerid;
+ task_unlock(task);
+ }
+
+ audit_log_set_containerid(task, oldcontainerid, containerid, rc);
+ return rc;
Why are audit_set_containerid_perm() and audit_log_containerid()
separate functions?
(I assume you mean audit_log_set_containerid()?)
It seemed clearer that all the permission checking was in one function
and its return code could be used to report the outcome when logging the
(attempted) action. This is the same structure as audit_set_loginuid()
and it made sense.

This would be the time to connect it to a syscall if that seems like a
good idea and remove pid, uid, auid, tty, ses fields.
Post by Paul Moore
paul moore
- RGB

--
Richard Guy Briggs <***@redhat.com>
Sr. S/W Engineer, Kernel Security, Base Operating Systems
Remote, Ottawa, Red Hat Canada
IRC: rgb, SunRaycer
Voice: +1.647.777.2635, Internal: (81) 32635
Paul Moore
2018-04-23 23:15:08 UTC
Permalink
Raw Message
Post by Casey Schaufler
Post by Paul Moore
Post by Richard Guy Briggs
Implement the proc fs write to set the audit container ID of a process,
emitting an AUDIT_CONTAINER record to document the event.
This is a write from the container orchestrator task to a proc entry of
the form /proc/PID/containerid where PID is the process ID of the newly
created task that is to become the first task in a container, or an
additional task added to a container.
The write expects up to a u64 value (unset: 18446744073709551615).
type=CONTAINER msg=audit(1519903238.968:261): op=set pid=596 uid=0 subj=unconfined_u:unconfined_r:unconfined_t:s0-s0:c0.c1023 auid=0 tty=pts0 ses=1 opid=596 old-contid=18446744073709551615 contid=123455 res=0
The "op" field indicates an initial set. The "pid" to "ses" fields are
the orchestrator while the "opid" field is the object's PID, the process
being "contained". Old and new container ID values are given in the
"contid" fields, while res indicates its success.
It is not permitted to self-set, unset or re-set the container ID. A
child inherits its parent's container ID, but then can be set only once
after.
See: https://github.com/linux-audit/audit-kernel/issues/32
---
fs/proc/base.c | 37 ++++++++++++++++++++
include/linux/audit.h | 16 +++++++++
include/linux/init_task.h | 4 ++-
include/linux/sched.h | 1 +
include/uapi/linux/audit.h | 2 ++
kernel/auditsc.c | 84 ++++++++++++++++++++++++++++++++++++++++++++++
6 files changed, 143 insertions(+), 1 deletion(-)
...
Post by Casey Schaufler
Post by Paul Moore
Post by Richard Guy Briggs
diff --git a/include/linux/sched.h b/include/linux/sched.h
index d258826..1b82191 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -796,6 +796,7 @@ struct task_struct {
#ifdef CONFIG_AUDITSYSCALL
kuid_t loginuid;
unsigned int sessionid;
+ u64 containerid;
This one line addition to the task_struct scares me the most of
anything in this patchset. Why? It's a field named "containerid" in
a perhaps one of the most widely used core kernel structures; the
possibilities for abuse are endless, and it's foolish to think we
would ever be able to adequately police this.
Fair enough.
Post by Paul Moore
Unfortunately, we can't add the field to audit_context as things
currently stand because we don't always allocate an audit_context,
it's dependent on the system's configuration, and we need to track the
audit container ID for a given process, regardless of the audit
configuration. Pretty much the same reason why loginuid and sessionid
are located directly in task_struct now. As I stressed during the
design phase, I really want to keep this as an *audit* container ID
and not a general purpose kernel wide container ID. If the kernel
ever grows a general purpose container ID token, I'll be the first in
line to convert the audit code, but I don't want audit to be that
general purpose mechanism ... audit is hated enough as-is ;)
When would we need an audit container ID when audit is not enabled
enough to have an audit_context?
I'm thinking of the audit_alloc() case where audit_filter_task()
returns AUDIT_DISABLED.

I believe this is the same reason why loginuid and sessionid live
directly in the task_struct and not in the audit_context; they need to
persist for the lifetime of the task.
Post by Casey Schaufler
If it is only used for audit, and audit is the only consumer, and audit
can only use it when it is enabled, then we can just return success to
any write to the proc filehandle, or not even present it. Nothing will
be able to know that value wasn't used.
When are loginuid and sessionid used now when audit is not enabled (or
should I say, explicitly disabled)?
See above. I think that should answer these questions.
Post by Casey Schaufler
Post by Paul Moore
Post by Richard Guy Briggs
diff --git a/include/uapi/linux/audit.h b/include/uapi/linux/audit.h
index 4e61a9e..921a71f 100644
--- a/include/uapi/linux/audit.h
+++ b/include/uapi/linux/audit.h
@@ -71,6 +71,7 @@
#define AUDIT_TTY_SET 1017 /* Set TTY auditing status */
#define AUDIT_SET_FEATURE 1018 /* Turn an audit feature on or off */
#define AUDIT_GET_FEATURE 1019 /* Get which features are enabled */
+#define AUDIT_CONTAINER 1020 /* Define the container id and information */
#define AUDIT_FIRST_USER_MSG 1100 /* Userspace messages mostly uninteresting to kernel */
#define AUDIT_USER_AVC 1107 /* We filter this differently */
@@ -465,6 +466,7 @@ struct audit_tty_status {
};
#define AUDIT_UID_UNSET (unsigned int)-1
+#define AUDIT_CID_UNSET ((u64)-1)
I think we need to decide if we want to distinguish between the "host"
(e.g. init ns) and "unset". Looking at this patch (I've only quickly
skimmed the others so far) it would appear that you don't think we
need to worry about this distinction; that's fine, but let's make it
explicit with a comment in the code that AUDIT_CID_UNSET means "unset"
as well as "host".
I don't see any reason to distinguish between "host" and "unset". Since
a container doesn't have a concrete definition based in namespaces, the
initial namespace set is meaningless here.
Okay, that sounds reasonable.
Post by Casey Schaufler
Is there value in having a container orchestrator process have a
reserved container ID that has a policy distinct from any other
container?
I'm open to arguments for this idea, but I don't see a point to it right now.
Post by Casey Schaufler
If so, then I could see the value in making the distinction.
For example, I've heard of interest in systemd acting as a container
orchestrator, so if it took on that role as PID 1, then every process in
the system would inherit that ID and none would be unset.
I can't picture how having seperate "host" and "unset" values helps us.
I don't have a strong feeling either way, I just wanted to ask the question.
Post by Casey Schaufler
Post by Paul Moore
Post by Richard Guy Briggs
/* audit_rule_data supports filter rules with both integer and string
* fields. It corresponds with AUDIT_ADD_RULE, AUDIT_DEL_RULE and
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 4e0a4ac..29c8482 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -2073,6 +2073,90 @@ int audit_set_loginuid(kuid_t loginuid)
return rc;
}
+static int audit_set_containerid_perm(struct task_struct *task, u64 containerid)
+{
+ struct task_struct *parent;
+ u64 pcontainerid, ccontainerid;
+
+ /* Don't allow to set our own containerid */
+ if (current == task)
+ return -EPERM;
Why not? Is there some obvious security concern that I missing?
We then lose the distinction in the AUDIT_CONTAINER record between the
initiating PID and the target PID. This was outlined in the proposal.
I just went back and reread the v3 proposal and I still don't see a
good explanation of this. Why is this bad? What's the security
concern?
Post by Casey Schaufler
Having said that, I'm still not sure we have protected sufficiently from
a child turning around and setting it's parent's as yet unset or
inherited audit container ID.
Yes, I believe we only want to let a task set the audit container for
it's children (or itself/threads if we decide to allow that, see
above). There *has* to be a function to check to see if a task if a
child of a given task ... right? ... although this is likely to be a
pointer traversal and locking nightmare ... hmmm.
Post by Casey Schaufler
Post by Paul Moore
I ask because I suppose it might be possible for some container
runtime to do a fork, setup some of the environment and them exec the
container (before you answer the obvious "namespaces!" please remember
we're not trying to define containers).
I don't think namespaces have any bearing on this concern since none are
required.
Post by Paul Moore
Post by Richard Guy Briggs
+ /* Don't allow the containerid to be unset */
+ if (!cid_valid(containerid))
+ return -EINVAL;
+ /* if we don't have caps, reject */
+ if (!capable(CAP_AUDIT_CONTROL))
+ return -EPERM;
+ /* if containerid is unset, allow */
+ if (!audit_containerid_set(task))
+ return 0;
+ /* it is already set, and not inherited from the parent, reject */
+ ccontainerid = audit_get_containerid(task);
+ rcu_read_lock();
+ parent = rcu_dereference(task->real_parent);
+ rcu_read_unlock();
+ task_lock(parent);
+ pcontainerid = audit_get_containerid(parent);
+ task_unlock(parent);
+ if (ccontainerid != pcontainerid)
+ return -EPERM;
+ return 0;
+}
+
+static void audit_log_set_containerid(struct task_struct *task, u64 oldcontainerid,
+ u64 containerid, int rc)
+{
+ struct audit_buffer *ab;
+ uid_t uid;
+ struct tty_struct *tty;
+
+ if (!audit_enabled)
+ return;
+
+ ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_CONTAINER);
+ if (!ab)
+ return;
+
+ uid = from_kuid(&init_user_ns, task_uid(current));
+ tty = audit_get_tty(current);
+
+ audit_log_format(ab, "op=set pid=%d uid=%u", task_tgid_nr(current), uid);
+ audit_log_task_context(ab);
+ audit_log_format(ab, " auid=%u tty=%s ses=%u opid=%d old-contid=%llu contid=%llu res=%d",
+ from_kuid(&init_user_ns, audit_get_loginuid(current)),
+ tty ? tty_name(tty) : "(none)", audit_get_sessionid(current),
+ task_tgid_nr(task), oldcontainerid, containerid, !rc);
+
+ audit_put_tty(tty);
+ audit_log_end(ab);
+}
+
+/**
+ * audit_set_containerid - set current task's audit_context containerid
+ *
+ * Returns 0 on success, -EPERM on permission failure.
+ *
+ * Called (set) from fs/proc/base.c::proc_containerid_write().
+ */
+int audit_set_containerid(struct task_struct *task, u64 containerid)
+{
+ u64 oldcontainerid;
+ int rc;
+
+ oldcontainerid = audit_get_containerid(task);
+
+ rc = audit_set_containerid_perm(task, containerid);
+ if (!rc) {
+ task_lock(task);
+ task->containerid = containerid;
+ task_unlock(task);
+ }
+
+ audit_log_set_containerid(task, oldcontainerid, containerid, rc);
+ return rc;
Why are audit_set_containerid_perm() and audit_log_containerid()
separate functions?
(I assume you mean audit_log_set_containerid()?)
Yep. My fingers got tired typing in that function name and decided a
shortcut was necessary.
Post by Casey Schaufler
It seemed clearer that all the permission checking was in one function
and its return code could be used to report the outcome when logging the
(attempted) action. This is the same structure as audit_set_loginuid()
and it made sense.
When possible I really like it when the permission checks are in the
same function as the code which does the work; it's less likely to get
abused that way (you have to willfully bypass the access checks). The
exceptions might be if you wanted to reuse the access control code, or
insert a modular access mechanism (e.g. LSMs).

I'm less concerned about audit_log_set_containerid(), but the usual
idea of avoiding single-use function within the same scope applies
here.
Post by Casey Schaufler
This would be the time to connect it to a syscall if that seems like a
good idea and remove pid, uid, auid, tty, ses fields.
Ah yes, I missed that. You know my stance on connecting records by
now (hint: yes, connect them) so I think that would be a good thing to
do for the next round.
--
paul moore
www.paul-moore.com
Richard Guy Briggs
2018-04-24 02:02:00 UTC
Permalink
Raw Message
Post by Paul Moore
Post by Casey Schaufler
Post by Paul Moore
Post by Richard Guy Briggs
Implement the proc fs write to set the audit container ID of a process,
emitting an AUDIT_CONTAINER record to document the event.
This is a write from the container orchestrator task to a proc entry of
the form /proc/PID/containerid where PID is the process ID of the newly
created task that is to become the first task in a container, or an
additional task added to a container.
The write expects up to a u64 value (unset: 18446744073709551615).
type=CONTAINER msg=audit(1519903238.968:261): op=set pid=596 uid=0 subj=unconfined_u:unconfined_r:unconfined_t:s0-s0:c0.c1023 auid=0 tty=pts0 ses=1 opid=596 old-contid=18446744073709551615 contid=123455 res=0
The "op" field indicates an initial set. The "pid" to "ses" fields are
the orchestrator while the "opid" field is the object's PID, the process
being "contained". Old and new container ID values are given in the
"contid" fields, while res indicates its success.
It is not permitted to self-set, unset or re-set the container ID. A
child inherits its parent's container ID, but then can be set only once
after.
See: https://github.com/linux-audit/audit-kernel/issues/32
---
fs/proc/base.c | 37 ++++++++++++++++++++
include/linux/audit.h | 16 +++++++++
include/linux/init_task.h | 4 ++-
include/linux/sched.h | 1 +
include/uapi/linux/audit.h | 2 ++
kernel/auditsc.c | 84 ++++++++++++++++++++++++++++++++++++++++++++++
6 files changed, 143 insertions(+), 1 deletion(-)
...
Post by Casey Schaufler
Post by Paul Moore
Post by Richard Guy Briggs
diff --git a/include/linux/sched.h b/include/linux/sched.h
index d258826..1b82191 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -796,6 +796,7 @@ struct task_struct {
#ifdef CONFIG_AUDITSYSCALL
kuid_t loginuid;
unsigned int sessionid;
+ u64 containerid;
This one line addition to the task_struct scares me the most of
anything in this patchset. Why? It's a field named "containerid" in
a perhaps one of the most widely used core kernel structures; the
possibilities for abuse are endless, and it's foolish to think we
would ever be able to adequately police this.
Fair enough.
Post by Paul Moore
Unfortunately, we can't add the field to audit_context as things
currently stand because we don't always allocate an audit_context,
it's dependent on the system's configuration, and we need to track the
audit container ID for a given process, regardless of the audit
configuration. Pretty much the same reason why loginuid and sessionid
are located directly in task_struct now. As I stressed during the
design phase, I really want to keep this as an *audit* container ID
and not a general purpose kernel wide container ID. If the kernel
ever grows a general purpose container ID token, I'll be the first in
line to convert the audit code, but I don't want audit to be that
general purpose mechanism ... audit is hated enough as-is ;)
When would we need an audit container ID when audit is not enabled
enough to have an audit_context?
I'm thinking of the audit_alloc() case where audit_filter_task()
returns AUDIT_DISABLED.
Ok, so a task could be marked as filtered but its children would still
be auditable and inheriting its parent containerid (as well at its
loginuid and sessionid)...
Post by Paul Moore
I believe this is the same reason why loginuid and sessionid live
directly in the task_struct and not in the audit_context; they need to
persist for the lifetime of the task.
Yes, probably.
Post by Paul Moore
Post by Casey Schaufler
If it is only used for audit, and audit is the only consumer, and audit
can only use it when it is enabled, then we can just return success to
any write to the proc filehandle, or not even present it. Nothing will
be able to know that value wasn't used.
When are loginuid and sessionid used now when audit is not enabled (or
should I say, explicitly disabled)?
See above. I think that should answer these questions.
Ok.
Post by Paul Moore
Post by Casey Schaufler
Post by Paul Moore
Post by Richard Guy Briggs
diff --git a/include/uapi/linux/audit.h b/include/uapi/linux/audit.h
index 4e61a9e..921a71f 100644
--- a/include/uapi/linux/audit.h
+++ b/include/uapi/linux/audit.h
@@ -71,6 +71,7 @@
#define AUDIT_TTY_SET 1017 /* Set TTY auditing status */
#define AUDIT_SET_FEATURE 1018 /* Turn an audit feature on or off */
#define AUDIT_GET_FEATURE 1019 /* Get which features are enabled */
+#define AUDIT_CONTAINER 1020 /* Define the container id and information */
#define AUDIT_FIRST_USER_MSG 1100 /* Userspace messages mostly uninteresting to kernel */
#define AUDIT_USER_AVC 1107 /* We filter this differently */
@@ -465,6 +466,7 @@ struct audit_tty_status {
};
#define AUDIT_UID_UNSET (unsigned int)-1
+#define AUDIT_CID_UNSET ((u64)-1)
I think we need to decide if we want to distinguish between the "host"
(e.g. init ns) and "unset". Looking at this patch (I've only quickly
skimmed the others so far) it would appear that you don't think we
need to worry about this distinction; that's fine, but let's make it
explicit with a comment in the code that AUDIT_CID_UNSET means "unset"
as well as "host".
I don't see any reason to distinguish between "host" and "unset". Since
a container doesn't have a concrete definition based in namespaces, the
initial namespace set is meaningless here.
Okay, that sounds reasonable.
Post by Casey Schaufler
Is there value in having a container orchestrator process have a
reserved container ID that has a policy distinct from any other
container?
I'm open to arguments for this idea, but I don't see a point to it right now.
Post by Casey Schaufler
If so, then I could see the value in making the distinction.
For example, I've heard of interest in systemd acting as a container
orchestrator, so if it took on that role as PID 1, then every process in
the system would inherit that ID and none would be unset.
I can't picture how having seperate "host" and "unset" values helps us.
I don't have a strong feeling either way, I just wanted to ask the question.
Post by Casey Schaufler
Post by Paul Moore
Post by Richard Guy Briggs
/* audit_rule_data supports filter rules with both integer and string
* fields. It corresponds with AUDIT_ADD_RULE, AUDIT_DEL_RULE and
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 4e0a4ac..29c8482 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -2073,6 +2073,90 @@ int audit_set_loginuid(kuid_t loginuid)
return rc;
}
+static int audit_set_containerid_perm(struct task_struct *task, u64 containerid)
+{
+ struct task_struct *parent;
+ u64 pcontainerid, ccontainerid;
+
+ /* Don't allow to set our own containerid */
+ if (current == task)
+ return -EPERM;
Why not? Is there some obvious security concern that I missing?
We then lose the distinction in the AUDIT_CONTAINER record between the
initiating PID and the target PID. This was outlined in the proposal.
I just went back and reread the v3 proposal and I still don't see a
good explanation of this. Why is this bad? What's the security
concern?
I don't remember, specifically. Maybe this has been addressed by the
check for children/threads or identical parent container ID. So, I'm
reluctantly willing to remove that check for now.
Post by Paul Moore
Post by Casey Schaufler
Having said that, I'm still not sure we have protected sufficiently from
a child turning around and setting it's parent's as yet unset or
inherited audit container ID.
Yes, I believe we only want to let a task set the audit container for
it's children (or itself/threads if we decide to allow that, see
above). There *has* to be a function to check to see if a task if a
child of a given task ... right? ... although this is likely to be a
pointer traversal and locking nightmare ... hmmm.
Isn't that just (struct task_struct)parent == (struct
task_struct)child->parent (or ->real_parent)?

And now that I say that, it is covered by the following patch's child
check, so as long as we keep that, we should be fine.
Post by Paul Moore
Post by Casey Schaufler
Post by Paul Moore
I ask because I suppose it might be possible for some container
runtime to do a fork, setup some of the environment and them exec the
container (before you answer the obvious "namespaces!" please remember
we're not trying to define containers).
I don't think namespaces have any bearing on this concern since none are
required.
Post by Paul Moore
Post by Richard Guy Briggs
+ /* Don't allow the containerid to be unset */
+ if (!cid_valid(containerid))
+ return -EINVAL;
+ /* if we don't have caps, reject */
+ if (!capable(CAP_AUDIT_CONTROL))
+ return -EPERM;
+ /* if containerid is unset, allow */
+ if (!audit_containerid_set(task))
+ return 0;
+ /* it is already set, and not inherited from the parent, reject */
+ ccontainerid = audit_get_containerid(task);
+ rcu_read_lock();
+ parent = rcu_dereference(task->real_parent);
+ rcu_read_unlock();
+ task_lock(parent);
+ pcontainerid = audit_get_containerid(parent);
+ task_unlock(parent);
+ if (ccontainerid != pcontainerid)
+ return -EPERM;
+ return 0;
+}
+
+static void audit_log_set_containerid(struct task_struct *task, u64 oldcontainerid,
+ u64 containerid, int rc)
+{
+ struct audit_buffer *ab;
+ uid_t uid;
+ struct tty_struct *tty;
+
+ if (!audit_enabled)
+ return;
+
+ ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_CONTAINER);
+ if (!ab)
+ return;
+
+ uid = from_kuid(&init_user_ns, task_uid(current));
+ tty = audit_get_tty(current);
+
+ audit_log_format(ab, "op=set pid=%d uid=%u", task_tgid_nr(current), uid);
+ audit_log_task_context(ab);
+ audit_log_format(ab, " auid=%u tty=%s ses=%u opid=%d old-contid=%llu contid=%llu res=%d",
+ from_kuid(&init_user_ns, audit_get_loginuid(current)),
+ tty ? tty_name(tty) : "(none)", audit_get_sessionid(current),
+ task_tgid_nr(task), oldcontainerid, containerid, !rc);
+
+ audit_put_tty(tty);
+ audit_log_end(ab);
+}
+
+/**
+ * audit_set_containerid - set current task's audit_context containerid
+ *
+ * Returns 0 on success, -EPERM on permission failure.
+ *
+ * Called (set) from fs/proc/base.c::proc_containerid_write().
+ */
+int audit_set_containerid(struct task_struct *task, u64 containerid)
+{
+ u64 oldcontainerid;
+ int rc;
+
+ oldcontainerid = audit_get_containerid(task);
+
+ rc = audit_set_containerid_perm(task, containerid);
+ if (!rc) {
+ task_lock(task);
+ task->containerid = containerid;
+ task_unlock(task);
+ }
+
+ audit_log_set_containerid(task, oldcontainerid, containerid, rc);
+ return rc;
Why are audit_set_containerid_perm() and audit_log_containerid()
separate functions?
(I assume you mean audit_log_set_containerid()?)
Yep. My fingers got tired typing in that function name and decided a
shortcut was necessary.
Post by Casey Schaufler
It seemed clearer that all the permission checking was in one function
and its return code could be used to report the outcome when logging the
(attempted) action. This is the same structure as audit_set_loginuid()
and it made sense.
When possible I really like it when the permission checks are in the
same function as the code which does the work; it's less likely to get
abused that way (you have to willfully bypass the access checks). The
exceptions might be if you wanted to reuse the access control code, or
insert a modular access mechanism (e.g. LSMs).
I don't follow how it could be abused. The return code from the perm
check gates setting the value and is used in the success field in the
log.
Post by Paul Moore
I'm less concerned about audit_log_set_containerid(), but the usual
idea of avoiding single-use function within the same scope applies
here.
Post by Casey Schaufler
This would be the time to connect it to a syscall if that seems like a
good idea and remove pid, uid, auid, tty, ses fields.
Ah yes, I missed that. You know my stance on connecting records by
now (hint: yes, connect them) so I think that would be a good thing to
do for the next round.
Ok...
Post by Paul Moore
paul moore
- RGB

--
Richard Guy Briggs <***@redhat.com>
Sr. S/W Engineer, Kernel Security, Base Operating Systems
Remote, Ottawa, Red Hat Canada
IRC: rgb, SunRaycer
Voice: +1.647.777.2635, Internal: (81) 32635
Paul Moore
2018-04-24 19:01:17 UTC
Permalink
Raw Message
Post by Richard Guy Briggs
Post by Paul Moore
Post by Richard Guy Briggs
Post by Paul Moore
Post by Richard Guy Briggs
Implement the proc fs write to set the audit container ID of a process,
emitting an AUDIT_CONTAINER record to document the event.
This is a write from the container orchestrator task to a proc entry of
the form /proc/PID/containerid where PID is the process ID of the newly
created task that is to become the first task in a container, or an
additional task added to a container.
The write expects up to a u64 value (unset: 18446744073709551615).
type=CONTAINER msg=audit(1519903238.968:261): op=set pid=596 uid=0 subj=unconfined_u:unconfined_r:unconfined_t:s0-s0:c0.c1023 auid=0 tty=pts0 ses=1 opid=596 old-contid=18446744073709551615 contid=123455 res=0
The "op" field indicates an initial set. The "pid" to "ses" fields are
the orchestrator while the "opid" field is the object's PID, the process
being "contained". Old and new container ID values are given in the
"contid" fields, while res indicates its success.
It is not permitted to self-set, unset or re-set the container ID. A
child inherits its parent's container ID, but then can be set only once
after.
See: https://github.com/linux-audit/audit-kernel/issues/32
---
fs/proc/base.c | 37 ++++++++++++++++++++
include/linux/audit.h | 16 +++++++++
include/linux/init_task.h | 4 ++-
include/linux/sched.h | 1 +
include/uapi/linux/audit.h | 2 ++
kernel/auditsc.c | 84 ++++++++++++++++++++++++++++++++++++++++++++++
6 files changed, 143 insertions(+), 1 deletion(-)
...
Post by Richard Guy Briggs
Post by Paul Moore
Post by Richard Guy Briggs
Post by Paul Moore
Post by Richard Guy Briggs
/* audit_rule_data supports filter rules with both integer and string
* fields. It corresponds with AUDIT_ADD_RULE, AUDIT_DEL_RULE and
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 4e0a4ac..29c8482 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -2073,6 +2073,90 @@ int audit_set_loginuid(kuid_t loginuid)
return rc;
}
+static int audit_set_containerid_perm(struct task_struct *task, u64 containerid)
+{
+ struct task_struct *parent;
+ u64 pcontainerid, ccontainerid;
+
+ /* Don't allow to set our own containerid */
+ if (current == task)
+ return -EPERM;
Why not? Is there some obvious security concern that I missing?
We then lose the distinction in the AUDIT_CONTAINER record between the
initiating PID and the target PID. This was outlined in the proposal.
I just went back and reread the v3 proposal and I still don't see a
good explanation of this. Why is this bad? What's the security
concern?
I don't remember, specifically. Maybe this has been addressed by the
check for children/threads or identical parent container ID. So, I'm
reluctantly willing to remove that check for now.
Okay. For the record, if someone can explain to me why this
restriction saves us from some terrible situation I'm all for leaving
it. I'm just opposed to restrictions without solid reasoning behind
them.
Post by Richard Guy Briggs
Post by Paul Moore
Post by Richard Guy Briggs
Having said that, I'm still not sure we have protected sufficiently from
a child turning around and setting it's parent's as yet unset or
inherited audit container ID.
Yes, I believe we only want to let a task set the audit container for
it's children (or itself/threads if we decide to allow that, see
above). There *has* to be a function to check to see if a task if a
child of a given task ... right? ... although this is likely to be a
pointer traversal and locking nightmare ... hmmm.
Isn't that just (struct task_struct)parent == (struct
task_struct)child->parent (or ->real_parent)?
And now that I say that, it is covered by the following patch's child
check, so as long as we keep that, we should be fine.
I was thinking of checking not just current's immediate children, but
any of it's descendants as I believe that is what we want to limit,
yes? I just worry that it isn't really practical to perform that
check.
Post by Richard Guy Briggs
Post by Paul Moore
Post by Richard Guy Briggs
Post by Paul Moore
I ask because I suppose it might be possible for some container
runtime to do a fork, setup some of the environment and them exec the
container (before you answer the obvious "namespaces!" please remember
we're not trying to define containers).
I don't think namespaces have any bearing on this concern since none are
required.
Post by Paul Moore
Post by Richard Guy Briggs
+ /* Don't allow the containerid to be unset */
+ if (!cid_valid(containerid))
+ return -EINVAL;
+ /* if we don't have caps, reject */
+ if (!capable(CAP_AUDIT_CONTROL))
+ return -EPERM;
+ /* if containerid is unset, allow */
+ if (!audit_containerid_set(task))
+ return 0;
+ /* it is already set, and not inherited from the parent, reject */
+ ccontainerid = audit_get_containerid(task);
+ rcu_read_lock();
+ parent = rcu_dereference(task->real_parent);
+ rcu_read_unlock();
+ task_lock(parent);
+ pcontainerid = audit_get_containerid(parent);
+ task_unlock(parent);
+ if (ccontainerid != pcontainerid)
+ return -EPERM;
+ return 0;
I'm looking at the parent checks again and I wonder if the logic above
is what we really want. Maybe it is, but I'm not sure.

Things I'm wondering about:

* "ccontainerid" and "containerid" are too close in name, I kept
confusing myself when looking at this code. Please change one. Bonus
points if it is shorter.

* What if the orchestrator wants to move the task to a new container?
Right now it looks like you can only do that once, then then the
task's audit container ID will no longer be the same as real_parent
... or does the orchestrator change that? *Can* the orchestrator
change real_parent (I suspect the answer is "no")?

* I think the key is the relationship between current and task, not
between task and task->real_parent. I believe what we really care
about is that task is a descendant of current. We might also want to
allow current to change the audit container ID if it holds
CAP_AUDIT_CONTROL, regardless of it's relationship with task.
Post by Richard Guy Briggs
Post by Paul Moore
Post by Richard Guy Briggs
Post by Paul Moore
Post by Richard Guy Briggs
+static void audit_log_set_containerid(struct task_struct *task, u64 oldcontainerid,
+ u64 containerid, int rc)
+{
+ struct audit_buffer *ab;
+ uid_t uid;
+ struct tty_struct *tty;
+
+ if (!audit_enabled)
+ return;
+
+ ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_CONTAINER);
+ if (!ab)
+ return;
+
+ uid = from_kuid(&init_user_ns, task_uid(current));
+ tty = audit_get_tty(current);
+
+ audit_log_format(ab, "op=set pid=%d uid=%u", task_tgid_nr(current), uid);
+ audit_log_task_context(ab);
+ audit_log_format(ab, " auid=%u tty=%s ses=%u opid=%d old-contid=%llu contid=%llu res=%d",
+ from_kuid(&init_user_ns, audit_get_loginuid(current)),
+ tty ? tty_name(tty) : "(none)", audit_get_sessionid(current),
+ task_tgid_nr(task), oldcontainerid, containerid, !rc);
+
+ audit_put_tty(tty);
+ audit_log_end(ab);
+}
+
+/**
+ * audit_set_containerid - set current task's audit_context containerid
+ *
+ * Returns 0 on success, -EPERM on permission failure.
+ *
+ * Called (set) from fs/proc/base.c::proc_containerid_write().
+ */
+int audit_set_containerid(struct task_struct *task, u64 containerid)
+{
+ u64 oldcontainerid;
+ int rc;
+
+ oldcontainerid = audit_get_containerid(task);
+
+ rc = audit_set_containerid_perm(task, containerid);
+ if (!rc) {
+ task_lock(task);
+ task->containerid = containerid;
+ task_unlock(task);
+ }
+
+ audit_log_set_containerid(task, oldcontainerid, containerid, rc);
+ return rc;
Why are audit_set_containerid_perm() and audit_log_containerid()
separate functions?
(I assume you mean audit_log_set_containerid()?)
Yep. My fingers got tired typing in that function name and decided a
shortcut was necessary.
Post by Richard Guy Briggs
It seemed clearer that all the permission checking was in one function
and its return code could be used to report the outcome when logging the
(attempted) action. This is the same structure as audit_set_loginuid()
and it made sense.
When possible I really like it when the permission checks are in the
same function as the code which does the work; it's less likely to get
abused that way (you have to willfully bypass the access checks). The
exceptions might be if you wanted to reuse the access control code, or
insert a modular access mechanism (e.g. LSMs).
I don't follow how it could be abused. The return code from the perm
check gates setting the value and is used in the success field in the
log.
If the permission checks are in the same function body as the code
which does the work you have to either split the function, or rewrite
it, if you want to bypass the permission checks. It may be more of a
style issue than an actual safety issue, but the comments about
single-use functions in the same scope is the tie breaker.
--
paul moore
www.paul-moore.com
Richard Guy Briggs
2018-04-25 00:40:31 UTC
Permalink
Raw Message
Post by Paul Moore
Post by Richard Guy Briggs
Post by Paul Moore
Post by Richard Guy Briggs
Post by Paul Moore
Post by Richard Guy Briggs
Implement the proc fs write to set the audit container ID of a process,
emitting an AUDIT_CONTAINER record to document the event.
This is a write from the container orchestrator task to a proc entry of
the form /proc/PID/containerid where PID is the process ID of the newly
created task that is to become the first task in a container, or an
additional task added to a container.
The write expects up to a u64 value (unset: 18446744073709551615).
type=CONTAINER msg=audit(1519903238.968:261): op=set pid=596 uid=0 subj=unconfined_u:unconfined_r:unconfined_t:s0-s0:c0.c1023 auid=0 tty=pts0 ses=1 opid=596 old-contid=18446744073709551615 contid=123455 res=0
The "op" field indicates an initial set. The "pid" to "ses" fields are
the orchestrator while the "opid" field is the object's PID, the process
being "contained". Old and new container ID values are given in the
"contid" fields, while res indicates its success.
It is not permitted to self-set, unset or re-set the container ID. A
child inherits its parent's container ID, but then can be set only once
after.
See: https://github.com/linux-audit/audit-kernel/issues/32
---
fs/proc/base.c | 37 ++++++++++++++++++++
include/linux/audit.h | 16 +++++++++
include/linux/init_task.h | 4 ++-
include/linux/sched.h | 1 +
include/uapi/linux/audit.h | 2 ++
kernel/auditsc.c | 84 ++++++++++++++++++++++++++++++++++++++++++++++
6 files changed, 143 insertions(+), 1 deletion(-)
...
Post by Richard Guy Briggs
Post by Paul Moore
Post by Richard Guy Briggs
Post by Paul Moore
Post by Richard Guy Briggs
/* audit_rule_data supports filter rules with both integer and string
* fields. It corresponds with AUDIT_ADD_RULE, AUDIT_DEL_RULE and
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 4e0a4ac..29c8482 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -2073,6 +2073,90 @@ int audit_set_loginuid(kuid_t loginuid)
return rc;
}
+static int audit_set_containerid_perm(struct task_struct *task, u64 containerid)
+{
+ struct task_struct *parent;
+ u64 pcontainerid, ccontainerid;
+
+ /* Don't allow to set our own containerid */
+ if (current == task)
+ return -EPERM;
Why not? Is there some obvious security concern that I missing?
We then lose the distinction in the AUDIT_CONTAINER record between the
initiating PID and the target PID. This was outlined in the proposal.
I just went back and reread the v3 proposal and I still don't see a
good explanation of this. Why is this bad? What's the security
concern?
I don't remember, specifically. Maybe this has been addressed by the
check for children/threads or identical parent container ID. So, I'm
reluctantly willing to remove that check for now.
Okay. For the record, if someone can explain to me why this
restriction saves us from some terrible situation I'm all for leaving
it. I'm just opposed to restrictions without solid reasoning behind
them.
Post by Richard Guy Briggs
Post by Paul Moore
Post by Richard Guy Briggs
Having said that, I'm still not sure we have protected sufficiently from
a child turning around and setting it's parent's as yet unset or
inherited audit container ID.
Yes, I believe we only want to let a task set the audit container for
it's children (or itself/threads if we decide to allow that, see
above). There *has* to be a function to check to see if a task if a
child of a given task ... right? ... although this is likely to be a
pointer traversal and locking nightmare ... hmmm.
Isn't that just (struct task_struct)parent == (struct
task_struct)child->parent (or ->real_parent)?
And now that I say that, it is covered by the following patch's child
check, so as long as we keep that, we should be fine.
I was thinking of checking not just current's immediate children, but
any of it's descendants as I believe that is what we want to limit,
yes? I just worry that it isn't really practical to perform that
check.
The child check I'm talking about prevents setting a task's audit
container ID if it *has* any children or threads, so if it has children
it is automatically disqualified and its grandchildren are irrelevant.
Post by Paul Moore
Post by Richard Guy Briggs
Post by Paul Moore
Post by Richard Guy Briggs
Post by Paul Moore
I ask because I suppose it might be possible for some container
runtime to do a fork, setup some of the environment and them exec the
container (before you answer the obvious "namespaces!" please remember
we're not trying to define containers).
I don't think namespaces have any bearing on this concern since none are
required.
Post by Paul Moore
Post by Richard Guy Briggs
+ /* Don't allow the containerid to be unset */
+ if (!cid_valid(containerid))
+ return -EINVAL;
+ /* if we don't have caps, reject */
+ if (!capable(CAP_AUDIT_CONTROL))
+ return -EPERM;
+ /* if containerid is unset, allow */
+ if (!audit_containerid_set(task))
+ return 0;
+ /* it is already set, and not inherited from the parent, reject */
+ ccontainerid = audit_get_containerid(task);
+ rcu_read_lock();
+ parent = rcu_dereference(task->real_parent);
+ rcu_read_unlock();
+ task_lock(parent);
+ pcontainerid = audit_get_containerid(parent);
+ task_unlock(parent);
+ if (ccontainerid != pcontainerid)
+ return -EPERM;
+ return 0;
I'm looking at the parent checks again and I wonder if the logic above
is what we really want. Maybe it is, but I'm not sure.
* "ccontainerid" and "containerid" are too close in name, I kept
confusing myself when looking at this code. Please change one. Bonus
points if it is shorter.
Would c_containerid and p_containerid be ok? child_cid and parent_cid?
I'd really like it to have the same root as the parameter handed in so
teh code is easier to follow. It would be nice to have that across
caller to local, but that's challenging.

I've been tempted to use contid or even cid everywhere instead of
containerid. Perhaps the longer name doesn't bother me because I
like its uniqueness and I learned touch-typing in grade 9 and I like
100+ character wide terminals? ;-)
Post by Paul Moore
* What if the orchestrator wants to move the task to a new container?
Right now it looks like you can only do that once, then then the
task's audit container ID will no longer be the same as real_parent
A task's audit container ID can be unset or inherited, and then set
only once. After that, if you want it moved to a new container you
can't and your only option is to spawn another peer to that task or a
child of it and set that new task's audit container ID.

Currently, the method of detecting if its audit container ID has been
set (rather than inherited) was to check its parent's audit container
ID. The only reason to change this might be if the audit container ID
were not inheritable, but then we lose the accountability of a task
spawning another process and being able to leave its child's audit
container ID unset and unaccountable to any existing container. I think
the relationship to the parent is crucial, and if something wants to
change audit container ID it can, by spawning childrent and leaving a
trail of container IDs in its parent processes. (So what if a parent
dies?)
Post by Paul Moore
... or does the orchestrator change that? *Can* the orchestrator
change real_parent (I suspect the answer is "no")?
I don't think the orchestrator is able to change real_parent. I've
forgotten why there is a ->parent and ->real_parent and how they can
change. One is for the wait signal. I don't remember the purpose of
the other.

If the parent dies before the child, the child will be re-parented on
its grandparent if the parent doesn't hang around zombified, if I
understand correctly. If anything, a parent dying would likely further
restrict the ability to set a task's audit container ID because a parent
with an identical ID could vanish.
Post by Paul Moore
* I think the key is the relationship between current and task, not
between task and task->real_parent. I believe what we really care
about is that task is a descendant of current. We might also want to
allow current to change the audit container ID if it holds
CAP_AUDIT_CONTROL, regardless of it's relationship with task.
Currently, a process with CAP_AUDIT_CONTROL can set the audit container
ID of any task that hasn't got children or threads, isn't itself, and
its audit container ID is inherited or unset. This was to try to
prevent games with parents and children scratching each other's backs.

I would feel more comfortable if only descendants were settable, so
adding that restriction sounds like a good idea to me other than the
tree-climbing excercise and overhead involved.
Post by Paul Moore
Post by Richard Guy Briggs
Post by Paul Moore
Post by Richard Guy Briggs
Post by Paul Moore
Post by Richard Guy Briggs
+static void audit_log_set_containerid(struct task_struct *task, u64 oldcontainerid,
+ u64 containerid, int rc)
+{
+ struct audit_buffer *ab;
+ uid_t uid;
+ struct tty_struct *tty;
+
+ if (!audit_enabled)
+ return;
+
+ ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_CONTAINER);
+ if (!ab)
+ return;
+
+ uid = from_kuid(&init_user_ns, task_uid(current));
+ tty = audit_get_tty(current);
+
+ audit_log_format(ab, "op=set pid=%d uid=%u", task_tgid_nr(current), uid);
+ audit_log_task_context(ab);
+ audit_log_format(ab, " auid=%u tty=%s ses=%u opid=%d old-contid=%llu contid=%llu res=%d",
+ from_kuid(&init_user_ns, audit_get_loginuid(current)),
+ tty ? tty_name(tty) : "(none)", audit_get_sessionid(current),
+ task_tgid_nr(task), oldcontainerid, containerid, !rc);
+
+ audit_put_tty(tty);
+ audit_log_end(ab);
+}
+
+/**
+ * audit_set_containerid - set current task's audit_context containerid
+ *
+ * Returns 0 on success, -EPERM on permission failure.
+ *
+ * Called (set) from fs/proc/base.c::proc_containerid_write().
+ */
+int audit_set_containerid(struct task_struct *task, u64 containerid)
+{
+ u64 oldcontainerid;
+ int rc;
+
+ oldcontainerid = audit_get_containerid(task);
+
+ rc = audit_set_containerid_perm(task, containerid);
+ if (!rc) {
+ task_lock(task);
+ task->containerid = containerid;
+ task_unlock(task);
+ }
+
+ audit_log_set_containerid(task, oldcontainerid, containerid, rc);
+ return rc;
Why are audit_set_containerid_perm() and audit_log_containerid()
separate functions?
(I assume you mean audit_log_set_containerid()?)
Yep. My fingers got tired typing in that function name and decided a
shortcut was necessary.
Post by Richard Guy Briggs
It seemed clearer that all the permission checking was in one function
and its return code could be used to report the outcome when logging the
(attempted) action. This is the same structure as audit_set_loginuid()
and it made sense.
When possible I really like it when the permission checks are in the
same function as the code which does the work; it's less likely to get
abused that way (you have to willfully bypass the access checks). The
exceptions might be if you wanted to reuse the access control code, or
insert a modular access mechanism (e.g. LSMs).
I don't follow how it could be abused. The return code from the perm
check gates setting the value and is used in the success field in the
log.
If the permission checks are in the same function body as the code
which does the work you have to either split the function, or rewrite
it, if you want to bypass the permission checks. It may be more of a
style issue than an actual safety issue, but the comments about
single-use functions in the same scope is the tie breaker.
Perhaps I'm just being quite dense, but I just don't follow what the
problem is and how you suggest fixing it. A bunch of gotos to a label
such as "out:" to log the refused action? That seems messy and
unstructured.
Post by Paul Moore
paul moore
- RGB

--
Richard Guy Briggs <***@redhat.com>
Sr. S/W Engineer, Kernel Security, Base Operating Systems
Remote, Ottawa, Red Hat Canada
IRC: rgb, SunRaycer
Voice: +1.647.777.2635, Internal: (81) 32635
Paul Moore
2018-04-26 22:47:45 UTC
Permalink
Raw Message
Post by Richard Guy Briggs
Post by Paul Moore
Post by Richard Guy Briggs
Post by Paul Moore
Post by Richard Guy Briggs
Post by Paul Moore
Post by Richard Guy Briggs
Implement the proc fs write to set the audit container ID of a process,
emitting an AUDIT_CONTAINER record to document the event.
This is a write from the container orchestrator task to a proc entry of
the form /proc/PID/containerid where PID is the process ID of the newly
created task that is to become the first task in a container, or an
additional task added to a container.
The write expects up to a u64 value (unset: 18446744073709551615).
type=CONTAINER msg=audit(1519903238.968:261): op=set pid=596 uid=0 subj=unconfined_u:unconfined_r:unconfined_t:s0-s0:c0.c1023 auid=0 tty=pts0 ses=1 opid=596 old-contid=18446744073709551615 contid=123455 res=0
The "op" field indicates an initial set. The "pid" to "ses" fields are
the orchestrator while the "opid" field is the object's PID, the process
being "contained". Old and new container ID values are given in the
"contid" fields, while res indicates its success.
It is not permitted to self-set, unset or re-set the container ID. A
child inherits its parent's container ID, but then can be set only once
after.
See: https://github.com/linux-audit/audit-kernel/issues/32
---
fs/proc/base.c | 37 ++++++++++++++++++++
include/linux/audit.h | 16 +++++++++
include/linux/init_task.h | 4 ++-
include/linux/sched.h | 1 +
include/uapi/linux/audit.h | 2 ++
kernel/auditsc.c | 84 ++++++++++++++++++++++++++++++++++++++++++++++
6 files changed, 143 insertions(+), 1 deletion(-)
...
Post by Richard Guy Briggs
Post by Paul Moore
Post by Richard Guy Briggs
Post by Paul Moore
Post by Richard Guy Briggs
/* audit_rule_data supports filter rules with both integer and string
* fields. It corresponds with AUDIT_ADD_RULE, AUDIT_DEL_RULE and
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 4e0a4ac..29c8482 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -2073,6 +2073,90 @@ int audit_set_loginuid(kuid_t loginuid)
return rc;
}
+static int audit_set_containerid_perm(struct task_struct *task, u64 containerid)
+{
+ struct task_struct *parent;
+ u64 pcontainerid, ccontainerid;
+
+ /* Don't allow to set our own containerid */
+ if (current == task)
+ return -EPERM;
Why not? Is there some obvious security concern that I missing?
We then lose the distinction in the AUDIT_CONTAINER record between the
initiating PID and the target PID. This was outlined in the proposal.
I just went back and reread the v3 proposal and I still don't see a
good explanation of this. Why is this bad? What's the security
concern?
I don't remember, specifically. Maybe this has been addressed by the
check for children/threads or identical parent container ID. So, I'm
reluctantly willing to remove that check for now.
Okay. For the record, if someone can explain to me why this
restriction saves us from some terrible situation I'm all for leaving
it. I'm just opposed to restrictions without solid reasoning behind
them.
Post by Richard Guy Briggs
Post by Paul Moore
Post by Richard Guy Briggs
Having said that, I'm still not sure we have protected sufficiently from
a child turning around and setting it's parent's as yet unset or
inherited audit container ID.
Yes, I believe we only want to let a task set the audit container for
it's children (or itself/threads if we decide to allow that, see
above). There *has* to be a function to check to see if a task if a
child of a given task ... right? ... although this is likely to be a
pointer traversal and locking nightmare ... hmmm.
Isn't that just (struct task_struct)parent == (struct
task_struct)child->parent (or ->real_parent)?
And now that I say that, it is covered by the following patch's child
check, so as long as we keep that, we should be fine.
I was thinking of checking not just current's immediate children, but
any of it's descendants as I believe that is what we want to limit,
yes? I just worry that it isn't really practical to perform that
check.
The child check I'm talking about prevents setting a task's audit
container ID if it *has* any children or threads, so if it has children
it is automatically disqualified and its grandchildren are irrelevant.
Post by Paul Moore
Post by Richard Guy Briggs
Post by Paul Moore
Post by Richard Guy Briggs
Post by Paul Moore
I ask because I suppose it might be possible for some container
runtime to do a fork, setup some of the environment and them exec the
container (before you answer the obvious "namespaces!" please remember
we're not trying to define containers).
I don't think namespaces have any bearing on this concern since none are
required.
Post by Paul Moore
Post by Richard Guy Briggs
+ /* Don't allow the containerid to be unset */
+ if (!cid_valid(containerid))
+ return -EINVAL;
+ /* if we don't have caps, reject */
+ if (!capable(CAP_AUDIT_CONTROL))
+ return -EPERM;
+ /* if containerid is unset, allow */
+ if (!audit_containerid_set(task))
+ return 0;
+ /* it is already set, and not inherited from the parent, reject */
+ ccontainerid = audit_get_containerid(task);
+ rcu_read_lock();
+ parent = rcu_dereference(task->real_parent);
+ rcu_read_unlock();
+ task_lock(parent);
+ pcontainerid = audit_get_containerid(parent);
+ task_unlock(parent);
+ if (ccontainerid != pcontainerid)
+ return -EPERM;
+ return 0;
I'm looking at the parent checks again and I wonder if the logic above
is what we really want. Maybe it is, but I'm not sure.
* "ccontainerid" and "containerid" are too close in name, I kept
confusing myself when looking at this code. Please change one. Bonus
points if it is shorter.
Would c_containerid and p_containerid be ok? child_cid and parent_cid?
Either would be an improvement over ccontainerid/containerid. I would
give a slight node to child_cid/parent_cid just for length reasons.
Post by Richard Guy Briggs
I'd really like it to have the same root as the parameter handed in so
teh code is easier to follow. It would be nice to have that across
caller to local, but that's challenging.
That's fine, but you have to admit that ccontainerid/containerid is
awkward and not easy to quickly differentiate :)
Post by Richard Guy Briggs
I've been tempted to use contid or even cid everywhere instead of
containerid. Perhaps the longer name doesn't bother me because I
like its uniqueness and I learned touch-typing in grade 9 and I like
100+ character wide terminals? ;-)
I would definitely appreciate contid/cid or similar, but I don't care
too much either way. As far as terminal width is concerned, please
make sure your code fits in 80 char terminals.
Post by Richard Guy Briggs
Post by Paul Moore
* What if the orchestrator wants to move the task to a new container?
Right now it looks like you can only do that once, then then the
task's audit container ID will no longer be the same as real_parent
A task's audit container ID can be unset or inherited, and then set
only once. After that, if you want it moved to a new container you
can't and your only option is to spawn another peer to that task or a
child of it and set that new task's audit container ID.
Okay. We've had some many discussions about this both on and off list
that I lose track on where we stand for certain things. I think
preventing task movement is fine for the initial effort so long as we
don't prevent adding it in the future; I don't see anything (other
than the permission checks under discussion, which is fine) preventing
this.
Post by Richard Guy Briggs
Currently, the method of detecting if its audit container ID has been
set (rather than inherited) was to check its parent's audit container
ID.
Yeah ... those are two different things. I've been wondering if we
should introduce a set/inherited flag as simply checking the parent
task's audit container ID isn't quite the same; although it may be
"close enough" that it doesn't matter in practice. However, I'm
beginning to think this parent/child relationship isn't really
important beyond the inheritance issue ... more on this below.
Post by Richard Guy Briggs
The only reason to change this might be if the audit container ID
were not inheritable, but then we lose the accountability of a task
spawning another process and being able to leave its child's audit
container ID unset and unaccountable to any existing container. I think
the relationship to the parent is crucial, and if something wants to
change audit container ID it can, by spawning childrent and leaving a
trail of container IDs in its parent processes. (So what if a parent
dies?)
The audit container ID *must* be inherited, I don't really think
anyone is questioning that. What I'm wondering about is what we
accomplish by comparing the child's and parent's audit container ID?

I've thought about this a bit more and I think we are making this way
too complicated right now. We basically have three rules for the
audit container ID which we need to follow:

1. Children inherit their parent's audit container ID; this includes
the magic "unset" audit container ID.
2. You can't change the audit container ID once set.
3. In order to set the audit container ID of a process you must have
CAP_AUDIT_CONTROL.

With that in mind, I think the permission checks would be something like this:

[SIDE NOTE: Audit Container ID in acronym form works out to "acid" ;) ]

int perm(task, acid)
{
if (!task || !valid(acid))
return -EINVAL;
if (!capable(CAP_AUDIT_CONTROL))
return -EPERM;
if (task->acid != UNSET)
return -EPERM;
return 0;
}
Post by Richard Guy Briggs
Post by Paul Moore
... or does the orchestrator change that? *Can* the orchestrator
change real_parent (I suspect the answer is "no")?
I don't think the orchestrator is able to change real_parent.
I didn't think so either, but I didn't do an exhaustive check.
Post by Richard Guy Briggs
I've forgotten why there is a ->parent and ->real_parent and how they can
change. One is for the wait signal. I don't remember the purpose of
the other.
I know ptrace makes use of real_parent when re-parenting the process
being ptrace'd.
Post by Richard Guy Briggs
If the parent dies before the child, the child will be re-parented on
its grandparent if the parent doesn't hang around zombified, if I
understand correctly. If anything, a parent dying would likely further
restrict the ability to set a task's audit container ID because a parent
with an identical ID could vanish.
All the more reason to go with the simplified approach above. I think
the parent/child relationship is a bit of a distraction and a
complexity that isn't important (except for the inheritance of
course).
Post by Richard Guy Briggs
Post by Paul Moore
* I think the key is the relationship between current and task, not
between task and task->real_parent. I believe what we really care
about is that task is a descendant of current. We might also want to
allow current to change the audit container ID if it holds
CAP_AUDIT_CONTROL, regardless of it's relationship with task.
Currently, a process with CAP_AUDIT_CONTROL can set the audit container
ID of any task that hasn't got children or threads, isn't itself, and
its audit container ID is inherited or unset. This was to try to
prevent games with parents and children scratching each other's backs.
I would feel more comfortable if only descendants were settable, so
adding that restriction sounds like a good idea to me other than the
tree-climbing excercise and overhead involved.
Post by Paul Moore
Post by Richard Guy Briggs
Post by Paul Moore
Post by Richard Guy Briggs
Post by Paul Moore
Post by Richard Guy Briggs
+static void audit_log_set_containerid(struct task_struct *task, u64 oldcontainerid,
+ u64 containerid, int rc)
+{
+ struct audit_buffer *ab;
+ uid_t uid;
+ struct tty_struct *tty;
+
+ if (!audit_enabled)
+ return;
+
+ ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_CONTAINER);
+ if (!ab)
+ return;
+
+ uid = from_kuid(&init_user_ns, task_uid(current));
+ tty = audit_get_tty(current);
+
+ audit_log_format(ab, "op=set pid=%d uid=%u", task_tgid_nr(current), uid);
+ audit_log_task_context(ab);
+ audit_log_format(ab, " auid=%u tty=%s ses=%u opid=%d old-contid=%llu contid=%llu res=%d",
+ from_kuid(&init_user_ns, audit_get_loginuid(current)),
+ tty ? tty_name(tty) : "(none)", audit_get_sessionid(current),
+ task_tgid_nr(task), oldcontainerid, containerid, !rc);
+
+ audit_put_tty(tty);
+ audit_log_end(ab);
+}
+
+/**
+ * audit_set_containerid - set current task's audit_context containerid
+ *
+ * Returns 0 on success, -EPERM on permission failure.
+ *
+ * Called (set) from fs/proc/base.c::proc_containerid_write().
+ */
+int audit_set_containerid(struct task_struct *task, u64 containerid)
+{
+ u64 oldcontainerid;
+ int rc;
+
+ oldcontainerid = audit_get_containerid(task);
+
+ rc = audit_set_containerid_perm(task, containerid);
+ if (!rc) {
+ task_lock(task);
+ task->containerid = containerid;
+ task_unlock(task);
+ }
+
+ audit_log_set_containerid(task, oldcontainerid, containerid, rc);
+ return rc;
Why are audit_set_containerid_perm() and audit_log_containerid()
separate functions?
(I assume you mean audit_log_set_containerid()?)
Yep. My fingers got tired typing in that function name and decided a
shortcut was necessary.
Post by Richard Guy Briggs
It seemed clearer that all the permission checking was in one function
and its return code could be used to report the outcome when logging the
(attempted) action. This is the same structure as audit_set_loginuid()
and it made sense.
When possible I really like it when the permission checks are in the
same function as the code which does the work; it's less likely to get
abused that way (you have to willfully bypass the access checks). The
exceptions might be if you wanted to reuse the access control code, or
insert a modular access mechanism (e.g. LSMs).
I don't follow how it could be abused. The return code from the perm
check gates setting the value and is used in the success field in the
log.
If the permission checks are in the same function body as the code
which does the work you have to either split the function, or rewrite
it, if you want to bypass the permission checks. It may be more of a
style issue than an actual safety issue, but the comments about
single-use functions in the same scope is the tie breaker.
Perhaps I'm just being quite dense, but I just don't follow what the
problem is and how you suggest fixing it. A bunch of gotos to a label
such as "out:" to log the refused action? That seems messy and
unstructured.
Fold audit_set_containerid_perm() and audit_log_set_containerid() into
their only caller, audit_set_containerid().
--
paul moore
www.paul-moore.com
Richard Guy Briggs
2018-05-06 16:51:30 UTC
Permalink
Raw Message
Post by Paul Moore
Post by Richard Guy Briggs
Implement the proc fs write to set the audit container ID of a process,
emitting an AUDIT_CONTAINER record to document the event.
This is a write from the container orchestrator task to a proc entry of
the form /proc/PID/containerid where PID is the process ID of the newly
created task that is to become the first task in a container, or an
additional task added to a container.
The write expects up to a u64 value (unset: 18446744073709551615).
type=CONTAINER msg=audit(1519903238.968:261): op=set pid=596 uid=0 subj=unconfined_u:unconfined_r:unconfined_t:s0-s0:c0.c1023 auid=0 tty=pts0 ses=1 opid=596 old-contid=18446744073709551615 contid=123455 res=0
The "op" field indicates an initial set. The "pid" to "ses" fields are
the orchestrator while the "opid" field is the object's PID, the process
being "contained". Old and new container ID values are given in the
"contid" fields, while res indicates its success.
It is not permitted to self-set, unset or re-set the container ID. A
child inherits its parent's container ID, but then can be set only once
after.
See: https://github.com/linux-audit/audit-kernel/issues/32
---
fs/proc/base.c | 37 ++++++++++++++++++++
include/linux/audit.h | 16 +++++++++
include/linux/init_task.h | 4 ++-
include/linux/sched.h | 1 +
include/uapi/linux/audit.h | 2 ++
kernel/auditsc.c | 84 ++++++++++++++++++++++++++++++++++++++++++++++
6 files changed, 143 insertions(+), 1 deletion(-)
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 60316b5..6ce4fbe 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -1299,6 +1299,41 @@ static ssize_t proc_sessionid_read(struct file * file, char __user * buf,
.read = proc_sessionid_read,
.llseek = generic_file_llseek,
};
+
+static ssize_t proc_containerid_write(struct file *file, const char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ struct inode *inode = file_inode(file);
+ u64 containerid;
+ int rv;
+ struct task_struct *task = get_proc_task(inode);
+
+ if (!task)
+ return -ESRCH;
+ if (*ppos != 0) {
+ /* No partial writes. */
+ put_task_struct(task);
+ return -EINVAL;
+ }
+
+ rv = kstrtou64_from_user(buf, count, 10, &containerid);
+ if (rv < 0) {
+ put_task_struct(task);
+ return rv;
+ }
+
+ rv = audit_set_containerid(task, containerid);
+ put_task_struct(task);
+ if (rv < 0)
+ return rv;
+ return count;
+}
+
+static const struct file_operations proc_containerid_operations = {
+ .write = proc_containerid_write,
+ .llseek = generic_file_llseek,
+};
+
#endif
#ifdef CONFIG_FAULT_INJECTION
@@ -2961,6 +2996,7 @@ static int proc_pid_patch_state(struct seq_file *m, struct pid_namespace *ns,
#ifdef CONFIG_AUDITSYSCALL
REG("loginuid", S_IWUSR|S_IRUGO, proc_loginuid_operations),
REG("sessionid", S_IRUGO, proc_sessionid_operations),
+ REG("containerid", S_IWUSR, proc_containerid_operations),
#endif
#ifdef CONFIG_FAULT_INJECTION
REG("make-it-fail", S_IRUGO|S_IWUSR, proc_fault_inject_operations),
@@ -3355,6 +3391,7 @@ static int proc_tid_comm_permission(struct inode *inode, int mask)
#ifdef CONFIG_AUDITSYSCALL
REG("loginuid", S_IWUSR|S_IRUGO, proc_loginuid_operations),
REG("sessionid", S_IRUGO, proc_sessionid_operations),
+ REG("containerid", S_IWUSR, proc_containerid_operations),
#endif
#ifdef CONFIG_FAULT_INJECTION
REG("make-it-fail", S_IRUGO|S_IWUSR, proc_fault_inject_operations),
...
Post by Paul Moore
Post by Richard Guy Briggs
diff --git a/include/linux/sched.h b/include/linux/sched.h
index d258826..1b82191 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -796,6 +796,7 @@ struct task_struct {
#ifdef CONFIG_AUDITSYSCALL
kuid_t loginuid;
unsigned int sessionid;
+ u64 containerid;
This one line addition to the task_struct scares me the most of
anything in this patchset. Why? It's a field named "containerid" in
a perhaps one of the most widely used core kernel structures; the
possibilities for abuse are endless, and it's foolish to think we
would ever be able to adequately police this.
Unfortunately, we can't add the field to audit_context as things
currently stand because we don't always allocate an audit_context,
it's dependent on the system's configuration, and we need to track the
audit container ID for a given process, regardless of the audit
configuration. Pretty much the same reason why loginuid and sessionid
are located directly in task_struct now. As I stressed during the
design phase, I really want to keep this as an *audit* container ID
and not a general purpose kernel wide container ID. If the kernel
ever grows a general purpose container ID token, I'll be the first in
line to convert the audit code, but I don't want audit to be that
general purpose mechanism ... audit is hated enough as-is ;)
I think the right solution to this is to create another new struct,
audit_task_info (or similar, the name really isn't that important),
which would be stored as a pointer in task_struct and would replace
the audit_context pointer, loginuid, sessionid, and the newly proposed
containerid. The new audit_task_info would always be allocated in the
audit_alloc() function (please use kmem_cache), and the audit_context
pointer included inside would continue to be allocated based on the
existing conditions. By keeping audit_task_info as a pointer inside
task_struct we could hide the structure definition inside
kernel/audit*.c and make it much more difficult for other subsystems
to abuse it.[1]
struct audit_task_info {
kuid_t loginuid;
unsigned int sessionid;
u64 containerid;
struct audit_context *ctx;
}
Actually, we might even want to consider storing audit_context in
audit_task_info (no pointer), or making it a zero length array
(ctx[0]) and going with a variable sized allocation of audit_task_info
... but all that could be done as a follow up optimization once we get
the basic idea sorted.
I tried statically allocating struct audit_task_info (with a pointer to
struct audit_context) in addition to dynamically allocating struct
audit_task_info due to a bug I'd introduced while dynamically allocating
audit_task_info, so I now have proof-of-concepts for working static and
almost working dynamic allocated struct audit_task_info.

Statically allocating it required a new header file, so I'm not that
crazy about it, but it proved it works.

Dynamically allocating it isn't quite as clean as was hoped since
init/init_task.c still needs initializaiton values for loginuid and
sessionid which could be supplied by a statically allocated struct
audit_task_info and still needs to know the internals of that struct to
do so. Dynamic allocation is also more disruptive initially, but in the
long run will be more stable to the rest of the kernel.

I'm not crazy about the idea of dynamically (or even statically)
allocating struct audit_task_info which includes allocated space for
struct audit_context since the latter is far larger than the former.
Post by Paul Moore
[1] If for some reason allocating audit_task_info becomes too much
overhead to bear (somewhat doubtful since we would only do it at task
creation), we could do some ugly tricks to directly include an
audit_task_struct chunk in task_struct but I'd like to avoid that if
possible (and I think we can).
On allocation, I don't see too much of a problem. When calling
audit_free() if there is no audit context it is pretty lightweight, but
gets heavier if we eliminate the inline audit_free() and rename
__audit_free() back to audit_free(). Having struct audit_task_info
directly in struct task_struct would be faster and also allow defaults
to be set in init/init_task.c (which has recently been populated from
include/linux/init_task.h). I'm not sure this is enough of a reason to
avoid a pointer from task_struct.

(As an aside, converting allocation of audit_context could also benefit
from kmem_cache... and maybe even struct audit_names)
Post by Paul Moore
Post by Richard Guy Briggs
#endif
struct seccomp seccomp;
...
paul moore
- RGB

--
Richard Guy Briggs <***@redhat.com>
Sr. S/W Engineer, Kernel Security, Base Operating Systems
Remote, Ottawa, Red Hat Canada
IRC: rgb, SunRaycer
Voice: +1.647.777.2635, Internal: (81) 32635
Steve Grubb
2018-05-17 21:00:53 UTC
Permalink
Raw Message
On Fri, 16 Mar 2018 05:00:28 -0400
Post by Richard Guy Briggs
Implement the proc fs write to set the audit container ID of a
process, emitting an AUDIT_CONTAINER record to document the event.
This is a write from the container orchestrator task to a proc entry
of the form /proc/PID/containerid where PID is the process ID of the
newly created task that is to become the first task in a container,
or an additional task added to a container.
The write expects up to a u64 value (unset: 18446744073709551615).
type=CONTAINER msg=audit(1519903238.968:261): op=set pid=596 uid=0
subj=unconfined_u:unconfined_r:unconfined_t:s0-s0:c0.c1023 auid=0
tty=pts0 ses=1 opid=596 old-contid=18446744073709551615 contid=123455
res=0
The was one thing I was wondering about. Currently when we set the
loginuid, the record is AUDIT_LOGINUID. The corollary is that when we
set the container id, the event should be AUDIT_CONTAINERID or
AUDIT_CONTAINER_ID.

During syscall events, the path info is returned in a a record simply
called AUDIT_PATH, cwd info is returned in AUDIT_CWD. So, rather than
calling the record that gets attached to everything
AUDIT_CONTAINER_INFO, how about simply AUDIT_CONTAINER.
Post by Richard Guy Briggs
The "op" field indicates an initial set. The "pid" to "ses" fields
are the orchestrator while the "opid" field is the object's PID, the
process being "contained". Old and new container ID values are given
in the "contid" fields, while res indicates its success.
It is not permitted to self-set, unset or re-set the container ID. A
child inherits its parent's container ID, but then can be set only
once after.
See: https://github.com/linux-audit/audit-kernel/issues/32
---
fs/proc/base.c | 37 ++++++++++++++++++++
include/linux/audit.h | 16 +++++++++
include/linux/init_task.h | 4 ++-
include/linux/sched.h | 1 +
include/uapi/linux/audit.h | 2 ++
kernel/auditsc.c | 84
++++++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 143
insertions(+), 1 deletion(-)
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 60316b5..6ce4fbe 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -1299,6 +1299,41 @@ static ssize_t proc_sessionid_read(struct file
* file, char __user * buf, .read = proc_sessionid_read,
.llseek = generic_file_llseek,
};
+
+static ssize_t proc_containerid_write(struct file *file, const char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ struct inode *inode = file_inode(file);
+ u64 containerid;
+ int rv;
+ struct task_struct *task = get_proc_task(inode);
+
+ if (!task)
+ return -ESRCH;
+ if (*ppos != 0) {
+ /* No partial writes. */
+ put_task_struct(task);
+ return -EINVAL;
+ }
+
+ rv = kstrtou64_from_user(buf, count, 10, &containerid);
+ if (rv < 0) {
+ put_task_struct(task);
+ return rv;
+ }
+
+ rv = audit_set_containerid(task, containerid);
+ put_task_struct(task);
+ if (rv < 0)
+ return rv;
+ return count;
+}
+
+static const struct file_operations proc_containerid_operations = {
+ .write = proc_containerid_write,
+ .llseek = generic_file_llseek,
+};
+
#endif
#ifdef CONFIG_FAULT_INJECTION
@@ -2961,6 +2996,7 @@ static int proc_pid_patch_state(struct seq_file
*m, struct pid_namespace *ns, #ifdef CONFIG_AUDITSYSCALL
REG("loginuid", S_IWUSR|S_IRUGO, proc_loginuid_operations),
REG("sessionid", S_IRUGO, proc_sessionid_operations),
+ REG("containerid", S_IWUSR, proc_containerid_operations),
#endif
#ifdef CONFIG_FAULT_INJECTION
REG("make-it-fail", S_IRUGO|S_IWUSR,
proc_tid_comm_permission(struct inode *inode, int mask) #ifdef
CONFIG_AUDITSYSCALL REG("loginuid", S_IWUSR|S_IRUGO,
proc_loginuid_operations), REG("sessionid", S_IRUGO,
proc_sessionid_operations),
+ REG("containerid", S_IWUSR, proc_containerid_operations),
#endif
#ifdef CONFIG_FAULT_INJECTION
REG("make-it-fail", S_IRUGO|S_IWUSR,
proc_fault_inject_operations), diff --git a/include/linux/audit.h
b/include/linux/audit.h index af410d9..fe4ba3f 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -29,6 +29,7 @@
#define AUDIT_INO_UNSET ((unsigned long)-1)
#define AUDIT_DEV_UNSET ((dev_t)-1)
+#define INVALID_CID AUDIT_CID_UNSET
struct audit_sig_info {
uid_t uid;
@@ -321,6 +322,7 @@ static inline void audit_ptrace(struct
task_struct *t) extern int auditsc_get_stamp(struct audit_context
*ctx, struct timespec64 *t, unsigned int *serial);
extern int audit_set_loginuid(kuid_t loginuid);
+extern int audit_set_containerid(struct task_struct *tsk, u64
containerid);
static inline kuid_t audit_get_loginuid(struct task_struct *tsk)
{
@@ -332,6 +334,11 @@ static inline unsigned int
audit_get_sessionid(struct task_struct *tsk) return tsk->sessionid;
}
+static inline u64 audit_get_containerid(struct task_struct *tsk)
+{
+ return tsk->containerid;
+}
+
extern void __audit_ipc_obj(struct kern_ipc_perm *ipcp);
extern void __audit_ipc_set_perm(unsigned long qbytes, uid_t uid,
gid_t gid, umode_t mode); extern void __audit_bprm(struct
audit_get_sessionid(struct task_struct *tsk) {
return -1;
}
+static inline kuid_t audit_get_containerid(struct task_struct *tsk)
+{
+ return INVALID_CID;
+}
static inline void audit_ipc_obj(struct kern_ipc_perm *ipcp)
{ }
static inline void audit_ipc_set_perm(unsigned long qbytes, uid_t
audit_loginuid_set(struct task_struct *tsk) return
uid_valid(audit_get_loginuid(tsk)); }
+static inline bool audit_containerid_set(struct task_struct *tsk)
+{
+ return audit_get_containerid(tsk) != INVALID_CID;
+}
+
static inline void audit_log_string(struct audit_buffer *ab, const
char *buf) {
audit_log_n_string(ab, buf, strlen(buf));
diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index 6a53262..046bd0a 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -18,6 +18,7 @@
#include <linux/sched/rt.h>
#include <linux/livepatch.h>
#include <linux/mm_types.h>
+#include <linux/audit.h>
#include <asm/thread_info.h>
@@ -120,7 +121,8 @@
#ifdef CONFIG_AUDITSYSCALL
#define INIT_IDS \
.loginuid = INVALID_UID, \
- .sessionid = (unsigned int)-1,
+ .sessionid = (unsigned int)-1, \
+ .containerid = INVALID_CID,
#else
#define INIT_IDS
#endif
diff --git a/include/linux/sched.h b/include/linux/sched.h
index d258826..1b82191 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -796,6 +796,7 @@ struct task_struct {
#ifdef CONFIG_AUDITSYSCALL
kuid_t loginuid;
unsigned int sessionid;
+ u64 containerid;
#endif
struct seccomp seccomp;
diff --git a/include/uapi/linux/audit.h b/include/uapi/linux/audit.h
index 4e61a9e..921a71f 100644
--- a/include/uapi/linux/audit.h
+++ b/include/uapi/linux/audit.h
@@ -71,6 +71,7 @@
#define AUDIT_TTY_SET 1017 /* Set TTY auditing
status */ #define AUDIT_SET_FEATURE 1018 /* Turn an
audit feature on or off */ #define AUDIT_GET_FEATURE
1019 /* Get which features are enabled */ +#define
AUDIT_CONTAINER 1020 /* Define the container id
and information */ #define AUDIT_FIRST_USER_MSG 1100 /*
Userspace messages mostly uninteresting to kernel */ #define
AUDIT_USER_AVC 1107 /* We filter this
#define AUDIT_UID_UNSET (unsigned int)-1
+#define AUDIT_CID_UNSET ((u64)-1)
/* audit_rule_data supports filter rules with both integer and string
* fields. It corresponds with AUDIT_ADD_RULE, AUDIT_DEL_RULE and
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 4e0a4ac..29c8482 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -2073,6 +2073,90 @@ int audit_set_loginuid(kuid_t loginuid)
return rc;
}
+static int audit_set_containerid_perm(struct task_struct *task, u64
containerid) +{
+ struct task_struct *parent;
+ u64 pcontainerid, ccontainerid;
+
+ /* Don't allow to set our own containerid */
+ if (current == task)
+ return -EPERM;
+ /* Don't allow the containerid to be unset */
+ if (!cid_valid(containerid))
+ return -EINVAL;
+ /* if we don't have caps, reject */
+ if (!capable(CAP_AUDIT_CONTROL))
+ return -EPERM;
+ /* if containerid is unset, allow */
+ if (!audit_containerid_set(task))
+ return 0;
+ /* it is already set, and not inherited from the parent,
reject */
+ ccontainerid = audit_get_containerid(task);
+ rcu_read_lock();
+ parent = rcu_dereference(task->real_parent);
+ rcu_read_unlock();
+ task_lock(parent);
+ pcontainerid = audit_get_containerid(parent);
+ task_unlock(parent);
+ if (ccontainerid != pcontainerid)
+ return -EPERM;
+ return 0;
+}
+
+static void audit_log_set_containerid(struct task_struct *task, u64 oldcontainerid,
+ u64 containerid, int rc)
+{
+ struct audit_buffer *ab;
+ uid_t uid;
+ struct tty_struct *tty;
+
+ if (!audit_enabled)
+ return;
+
+ ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_CONTAINER);
+ if (!ab)
+ return;
+
+ uid = from_kuid(&init_user_ns, task_uid(current));
+ tty = audit_get_tty(current);
+
+ audit_log_format(ab, "op=set pid=%d uid=%u",
task_tgid_nr(current), uid);
+ audit_log_task_context(ab);
+ audit_log_format(ab, " auid=%u tty=%s ses=%u opid=%d
old-contid=%llu contid=%llu res=%d",
The preferred ordering would be: op, opid, old-contid, contid, pid, uid,
tty, ses, subj, comm, exe, res. This groups the searchable fields
together using the most common ordering so that parsing is simple.

Thanks,
-Steve
Post by Richard Guy Briggs
+ from_kuid(&init_user_ns,
audit_get_loginuid(current)),
+ tty ? tty_name(tty) : "(none)",
audit_get_sessionid(current),
+ task_tgid_nr(task), oldcontainerid,
containerid, !rc); +
+ audit_put_tty(tty);
+ audit_log_end(ab);
+}
+
+/**
+ * audit_set_containerid - set current task's audit_context
containerid
+ *
+ * Returns 0 on success, -EPERM on permission failure.
+ *
+ * Called (set) from fs/proc/base.c::proc_containerid_write().
+ */
+int audit_set_containerid(struct task_struct *task, u64 containerid)
+{
+ u64 oldcontainerid;
+ int rc;
+
+ oldcontainerid = audit_get_containerid(task);
+
+ rc = audit_set_containerid_perm(task, containerid);
+ if (!rc) {
+ task_lock(task);
+ task->containerid = containerid;
+ task_unlock(task);
+ }
+
+ audit_log_set_containerid(task, oldcontainerid, containerid,
rc);
+ return rc;
+}
+
/**
* __audit_mq_open - record audit data for a POSIX MQ open
Richard Guy Briggs
2018-05-17 21:56:00 UTC
Permalink
Raw Message
On Fri, 16 Mar 2018 05:00:28 -0400
Post by Richard Guy Briggs
Implement the proc fs write to set the audit container ID of a
process, emitting an AUDIT_CONTAINER record to document the event.
This is a write from the container orchestrator task to a proc entry
of the form /proc/PID/containerid where PID is the process ID of the
newly created task that is to become the first task in a container,
or an additional task added to a container.
The write expects up to a u64 value (unset: 18446744073709551615).
type=CONTAINER msg=audit(1519903238.968:261): op=set pid=596 uid=0
subj=unconfined_u:unconfined_r:unconfined_t:s0-s0:c0.c1023 auid=0
tty=pts0 ses=1 opid=596 old-contid=18446744073709551615 contid=123455
res=0
The was one thing I was wondering about. Currently when we set the
loginuid, the record is AUDIT_LOGINUID. The corollary is that when we
set the container id, the event should be AUDIT_CONTAINERID or
AUDIT_CONTAINER_ID.
The record type is actually AUDIT_LOGIN. The field type is
AUDIT_LOGINUID. Given that correction, I think we're fine and could
potentially violently agree. The existing naming is consistent.
During syscall events, the path info is returned in a a record simply
called AUDIT_PATH, cwd info is returned in AUDIT_CWD. So, rather than
calling the record that gets attached to everything
AUDIT_CONTAINER_INFO, how about simply AUDIT_CONTAINER.
Considering the container initiation record is different than the record
to document the container involved in an otherwise normal syscall, we
need two names. I don't have a strong opinion what they are.

I'd prefer AUDIT_CONTAINER and AUDIT_CONTAINER_INFO so that the two are
different enough to be visually distinct while leaving
AUDIT_CONTAINERID for the field type in patch 4 ("audit: add containerid
filtering")
Post by Richard Guy Briggs
The "op" field indicates an initial set. The "pid" to "ses" fields
are the orchestrator while the "opid" field is the object's PID, the
process being "contained". Old and new container ID values are given
in the "contid" fields, while res indicates its success.
It is not permitted to self-set, unset or re-set the container ID. A
child inherits its parent's container ID, but then can be set only
once after.
See: https://github.com/linux-audit/audit-kernel/issues/32
---
fs/proc/base.c | 37 ++++++++++++++++++++
include/linux/audit.h | 16 +++++++++
include/linux/init_task.h | 4 ++-
include/linux/sched.h | 1 +
include/uapi/linux/audit.h | 2 ++
kernel/auditsc.c | 84
++++++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 143
insertions(+), 1 deletion(-)
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 60316b5..6ce4fbe 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -1299,6 +1299,41 @@ static ssize_t proc_sessionid_read(struct file
* file, char __user * buf, .read = proc_sessionid_read,
.llseek = generic_file_llseek,
};
+
+static ssize_t proc_containerid_write(struct file *file, const char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ struct inode *inode = file_inode(file);
+ u64 containerid;
+ int rv;
+ struct task_struct *task = get_proc_task(inode);
+
+ if (!task)
+ return -ESRCH;
+ if (*ppos != 0) {
+ /* No partial writes. */
+ put_task_struct(task);
+ return -EINVAL;
+ }
+
+ rv = kstrtou64_from_user(buf, count, 10, &containerid);
+ if (rv < 0) {
+ put_task_struct(task);
+ return rv;
+ }
+
+ rv = audit_set_containerid(task, containerid);
+ put_task_struct(task);
+ if (rv < 0)
+ return rv;
+ return count;
+}
+
+static const struct file_operations proc_containerid_operations = {
+ .write = proc_containerid_write,
+ .llseek = generic_file_llseek,
+};
+
#endif
#ifdef CONFIG_FAULT_INJECTION
@@ -2961,6 +2996,7 @@ static int proc_pid_patch_state(struct seq_file
*m, struct pid_namespace *ns, #ifdef CONFIG_AUDITSYSCALL
REG("loginuid", S_IWUSR|S_IRUGO, proc_loginuid_operations),
REG("sessionid", S_IRUGO, proc_sessionid_operations),
+ REG("containerid", S_IWUSR, proc_containerid_operations),
#endif
#ifdef CONFIG_FAULT_INJECTION
REG("make-it-fail", S_IRUGO|S_IWUSR,
proc_tid_comm_permission(struct inode *inode, int mask) #ifdef
CONFIG_AUDITSYSCALL REG("loginuid", S_IWUSR|S_IRUGO,
proc_loginuid_operations), REG("sessionid", S_IRUGO,
proc_sessionid_operations),
+ REG("containerid", S_IWUSR, proc_containerid_operations),
#endif
#ifdef CONFIG_FAULT_INJECTION
REG("make-it-fail", S_IRUGO|S_IWUSR,
proc_fault_inject_operations), diff --git a/include/linux/audit.h
b/include/linux/audit.h index af410d9..fe4ba3f 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -29,6 +29,7 @@
#define AUDIT_INO_UNSET ((unsigned long)-1)
#define AUDIT_DEV_UNSET ((dev_t)-1)
+#define INVALID_CID AUDIT_CID_UNSET
struct audit_sig_info {
uid_t uid;
@@ -321,6 +322,7 @@ static inline void audit_ptrace(struct
task_struct *t) extern int auditsc_get_stamp(struct audit_context
*ctx, struct timespec64 *t, unsigned int *serial);
extern int audit_set_loginuid(kuid_t loginuid);
+extern int audit_set_containerid(struct task_struct *tsk, u64 containerid);
static inline kuid_t audit_get_loginuid(struct task_struct *tsk)
{
@@ -332,6 +334,11 @@ static inline unsigned int
audit_get_sessionid(struct task_struct *tsk) return tsk->sessionid;
}
+static inline u64 audit_get_containerid(struct task_struct *tsk)
+{
+ return tsk->containerid;
+}
+
extern void __audit_ipc_obj(struct kern_ipc_perm *ipcp);
extern void __audit_ipc_set_perm(unsigned long qbytes, uid_t uid,
gid_t gid, umode_t mode); extern void __audit_bprm(struct
audit_get_sessionid(struct task_struct *tsk) {
return -1;
}
+static inline kuid_t audit_get_containerid(struct task_struct *tsk)
+{
+ return INVALID_CID;
+}
static inline void audit_ipc_obj(struct kern_ipc_perm *ipcp)
{ }
static inline void audit_ipc_set_perm(unsigned long qbytes, uid_t
audit_loginuid_set(struct task_struct *tsk) return
uid_valid(audit_get_loginuid(tsk)); }
+static inline bool audit_containerid_set(struct task_struct *tsk)
+{
+ return audit_get_containerid(tsk) != INVALID_CID;
+}
+
static inline void audit_log_string(struct audit_buffer *ab, const
char *buf) {
audit_log_n_string(ab, buf, strlen(buf));
diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index 6a53262..046bd0a 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -18,6 +18,7 @@
#include <linux/sched/rt.h>
#include <linux/livepatch.h>
#include <linux/mm_types.h>
+#include <linux/audit.h>
#include <asm/thread_info.h>
@@ -120,7 +121,8 @@
#ifdef CONFIG_AUDITSYSCALL
#define INIT_IDS \
.loginuid = INVALID_UID, \
- .sessionid = (unsigned int)-1,
+ .sessionid = (unsigned int)-1, \
+ .containerid = INVALID_CID,
#else
#define INIT_IDS
#endif
diff --git a/include/linux/sched.h b/include/linux/sched.h
index d258826..1b82191 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -796,6 +796,7 @@ struct task_struct {
#ifdef CONFIG_AUDITSYSCALL
kuid_t loginuid;
unsigned int sessionid;
+ u64 containerid;
#endif
struct seccomp seccomp;
diff --git a/include/uapi/linux/audit.h b/include/uapi/linux/audit.h
index 4e61a9e..921a71f 100644
--- a/include/uapi/linux/audit.h
+++ b/include/uapi/linux/audit.h
@@ -71,6 +71,7 @@
#define AUDIT_TTY_SET 1017 /* Set TTY auditing
status */ #define AUDIT_SET_FEATURE 1018 /* Turn an
audit feature on or off */ #define AUDIT_GET_FEATURE
1019 /* Get which features are enabled */ +#define
AUDIT_CONTAINER 1020 /* Define the container id
and information */ #define AUDIT_FIRST_USER_MSG 1100 /*
Userspace messages mostly uninteresting to kernel */ #define
AUDIT_USER_AVC 1107 /* We filter this
#define AUDIT_UID_UNSET (unsigned int)-1
+#define AUDIT_CID_UNSET ((u64)-1)
/* audit_rule_data supports filter rules with both integer and string
* fields. It corresponds with AUDIT_ADD_RULE, AUDIT_DEL_RULE and
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 4e0a4ac..29c8482 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -2073,6 +2073,90 @@ int audit_set_loginuid(kuid_t loginuid)
return rc;
}
+static int audit_set_containerid_perm(struct task_struct *task, u64
containerid) +{
+ struct task_struct *parent;
+ u64 pcontainerid, ccontainerid;
+
+ /* Don't allow to set our own containerid */
+ if (current == task)
+ return -EPERM;
+ /* Don't allow the containerid to be unset */
+ if (!cid_valid(containerid))
+ return -EINVAL;
+ /* if we don't have caps, reject */
+ if (!capable(CAP_AUDIT_CONTROL))
+ return -EPERM;
+ /* if containerid is unset, allow */
+ if (!audit_containerid_set(task))
+ return 0;
+ /* it is already set, and not inherited from the parent,
reject */
+ ccontainerid = audit_get_containerid(task);
+ rcu_read_lock();
+ parent = rcu_dereference(task->real_parent);
+ rcu_read_unlock();
+ task_lock(parent);
+ pcontainerid = audit_get_containerid(parent);
+ task_unlock(parent);
+ if (ccontainerid != pcontainerid)
+ return -EPERM;
+ return 0;
+}
+
+static void audit_log_set_containerid(struct task_struct *task, u64 oldcontainerid,
+ u64 containerid, int rc)
+{
+ struct audit_buffer *ab;
+ uid_t uid;
+ struct tty_struct *tty;
+
+ if (!audit_enabled)
+ return;
+
+ ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_CONTAINER);
+ if (!ab)
+ return;
+
+ uid = from_kuid(&init_user_ns, task_uid(current));
+ tty = audit_get_tty(current);
+
+ audit_log_format(ab, "op=set pid=%d uid=%u",
task_tgid_nr(current), uid);
+ audit_log_task_context(ab);
+ audit_log_format(ab, " auid=%u tty=%s ses=%u opid=%d
old-contid=%llu contid=%llu res=%d",
The preferred ordering would be: op, opid, old-contid, contid, pid, uid,
tty, ses, subj, comm, exe, res. This groups the searchable fields
together using the most common ordering so that parsing is simple.
There has been a suggestion to make this a syscall-connected record, and
if that is the case, we'd simply drop all the fields that would be
duplicated by the syscall record. Otherwise, I'll use your suggested
order.

As you may recall this suggestion was also made for the AUDIT_LOGIN
record.
Thanks,
-Steve
Post by Richard Guy Briggs
+ from_kuid(&init_user_ns,
audit_get_loginuid(current)),
+ tty ? tty_name(tty) : "(none)",
audit_get_sessionid(current),
+ task_tgid_nr(task), oldcontainerid,
containerid, !rc); +
+ audit_put_tty(tty);
+ audit_log_end(ab);
+}
+
+/**
+ * audit_set_containerid - set current task's audit_context
containerid
+ *
+ * Returns 0 on success, -EPERM on permission failure.
+ *
+ * Called (set) from fs/proc/base.c::proc_containerid_write().
+ */
+int audit_set_containerid(struct task_struct *task, u64 containerid)
+{
+ u64 oldcontainerid;
+ int rc;
+
+ oldcontainerid = audit_get_containerid(task);
+
+ rc = audit_set_containerid_perm(task, containerid);
+ if (!rc) {
+ task_lock(task);
+ task->containerid = containerid;
+ task_unlock(task);
+ }
+
+ audit_log_set_containerid(task, oldcontainerid, containerid, rc);
+ return rc;
+}
+
/**
* __audit_mq_open - record audit data for a POSIX MQ open
- RGB

--
Richard Guy Briggs <***@redhat.com>
Sr. S/W Engineer, Kernel Security, Base Operating Systems
Remote, Ottawa, Red Hat Canada
IRC: rgb, SunRaycer
Voice: +1.647.777.2635, Internal: (81) 32635
Richard Guy Briggs
2018-05-18 15:21:06 UTC
Permalink
Raw Message
On Thu, 17 May 2018 17:56:00 -0400
Post by Richard Guy Briggs
Post by Steve Grubb
During syscall events, the path info is returned in a a record
simply called AUDIT_PATH, cwd info is returned in AUDIT_CWD. So,
rather than calling the record that gets attached to everything
AUDIT_CONTAINER_INFO, how about simply AUDIT_CONTAINER.
Considering the container initiation record is different than the
record to document the container involved in an otherwise normal
syscall, we need two names. I don't have a strong opinion what they
are.
I'd prefer AUDIT_CONTAIN and AUDIT_CONTAINER_INFO so that the two
are different enough to be visually distinct while leaving
AUDIT_CONTAINERID for the field type in patch 4 ("audit: add
containerid filtering")
(Sorry, I had intended AUDIT_CONTAINER for the first in that paragraph
above.)
How about AUDIT_CONTAINER for the auxiliary record? The one that starts
the container, I don't have a strong opinion on. Could be
AUDIT_CONTAINER_INIT, AUDIT_CONTAINER_START, AUDIT_CONTAINERID,
AUDIT_CONTAINER_ID, or something else. The API call that sets the ID
for filtering could be AUDIT_CID or AUDIT_CONTID if that helps decide
what the initial event might be. Normally, it should match the field
being filtered.
Ok, I had shortened the record field name to "contid=" to be unique
enough while not using too much netlink bandwidth. I could have used
"cid=" but that could be unobvious or ambiguous. I didn't want to use
the full "containerid=" due to that. I suppose I could change the
field name macro to AUDIT_CONTID.

For the one that starts the container, I'd prefer to leave the name a
bit more general than "_INIT", "_START", so maybe I'll swap them around
and use AUDIT_CONTAINER_INFO for the startup record, and use
AUDIT_CONTAINER for the syscall auxiliary record.

Does that work?
-Steve
- RGB

--
Richard Guy Briggs <***@redhat.com>
Sr. S/W Engineer, Kernel Security, Base Operating Systems
Remote, Ottawa, Red Hat Canada
IRC: rgb, SunRaycer
Voice: +1.647.777.2635, Internal: (81) 32635
Steve Grubb
2018-05-18 15:38:25 UTC
Permalink
Raw Message
On Fri, 18 May 2018 11:21:06 -0400
Post by Richard Guy Briggs
On Thu, 17 May 2018 17:56:00 -0400
Post by Richard Guy Briggs
Post by Steve Grubb
During syscall events, the path info is returned in a a record
simply called AUDIT_PATH, cwd info is returned in AUDIT_CWD. So,
rather than calling the record that gets attached to everything
AUDIT_CONTAINER_INFO, how about simply AUDIT_CONTAINER.
Considering the container initiation record is different than the
record to document the container involved in an otherwise normal
syscall, we need two names. I don't have a strong opinion what
they are.
I'd prefer AUDIT_CONTAIN and AUDIT_CONTAINER_INFO so that the two
are different enough to be visually distinct while leaving
AUDIT_CONTAINERID for the field type in patch 4 ("audit: add
containerid filtering")
(Sorry, I had intended AUDIT_CONTAINER for the first in that paragraph
above.)
How about AUDIT_CONTAINER for the auxiliary record? The one that
starts the container, I don't have a strong opinion on. Could be
AUDIT_CONTAINER_INIT, AUDIT_CONTAINER_START, AUDIT_CONTAINERID,
AUDIT_CONTAINER_ID, or something else. The API call that sets the ID
for filtering could be AUDIT_CID or AUDIT_CONTID if that helps
decide what the initial event might be. Normally, it should match
the field being filtered.
Ok, I had shortened the record field name to "contid=" to be unique
enough while not using too much netlink bandwidth. I could have used
"cid=" but that could be unobvious or ambiguous. I didn't want to use
the full "containerid=" due to that. I suppose I could change the
field name macro to AUDIT_CONTID.
For the one that starts the container, I'd prefer to leave the name a
bit more general than "_INIT", "_START", so maybe I'll swap them
around and use AUDIT_CONTAINER_INFO for the startup record, and use
AUDIT_CONTAINER for the syscall auxiliary record.
Does that work?
I'll go along with that. Thanks. But making that swap frees up
AUDIT_CONTAINER_ID which could be the first event. But
AUDIT_CONTAINER_INFO is also fine with me.

Best Regards,
-Steve
Richard Guy Briggs
2018-06-01 21:04:55 UTC
Permalink
Raw Message
On Fri, 16 Mar 2018 05:00:28 -0400
Post by Richard Guy Briggs
Implement the proc fs write to set the audit container ID of a
process, emitting an AUDIT_CONTAINER record to document the event.
This is a write from the container orchestrator task to a proc entry
of the form /proc/PID/containerid where PID is the process ID of the
newly created task that is to become the first task in a container,
or an additional task added to a container.
The write expects up to a u64 value (unset: 18446744073709551615).
type=CONTAINER msg=audit(1519903238.968:261): op=set pid=596 uid=0
subj=unconfined_u:unconfined_r:unconfined_t:s0-s0:c0.c1023 auid=0
tty=pts0 ses=1 opid=596 old-contid=18446744073709551615 contid=123455
res=0
The was one thing I was wondering about. Currently when we set the
loginuid, the record is AUDIT_LOGINUID. The corollary is that when we
set the container id, the event should be AUDIT_CONTAINERID or
AUDIT_CONTAINER_ID.
During syscall events, the path info is returned in a a record simply
called AUDIT_PATH, cwd info is returned in AUDIT_CWD. So, rather than
calling the record that gets attached to everything
AUDIT_CONTAINER_INFO, how about simply AUDIT_CONTAINER.
Post by Richard Guy Briggs
The "op" field indicates an initial set. The "pid" to "ses" fields
are the orchestrator while the "opid" field is the object's PID, the
process being "contained". Old and new container ID values are given
in the "contid" fields, while res indicates its success.
It is not permitted to self-set, unset or re-set the container ID. A
child inherits its parent's container ID, but then can be set only
once after.
See: https://github.com/linux-audit/audit-kernel/issues/32
---
fs/proc/base.c | 37 ++++++++++++++++++++
include/linux/audit.h | 16 +++++++++
include/linux/init_task.h | 4 ++-
include/linux/sched.h | 1 +
include/uapi/linux/audit.h | 2 ++
kernel/auditsc.c | 84
++++++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 143
insertions(+), 1 deletion(-)
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 60316b5..6ce4fbe 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -1299,6 +1299,41 @@ static ssize_t proc_sessionid_read(struct file
* file, char __user * buf, .read = proc_sessionid_read,
.llseek = generic_file_llseek,
};
+
+static ssize_t proc_containerid_write(struct file *file, const char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ struct inode *inode = file_inode(file);
+ u64 containerid;
+ int rv;
+ struct task_struct *task = get_proc_task(inode);
+
+ if (!task)
+ return -ESRCH;
+ if (*ppos != 0) {
+ /* No partial writes. */
+ put_task_struct(task);
+ return -EINVAL;
+ }
+
+ rv = kstrtou64_from_user(buf, count, 10, &containerid);
+ if (rv < 0) {
+ put_task_struct(task);
+ return rv;
+ }
+
+ rv = audit_set_containerid(task, containerid);
+ put_task_struct(task);
+ if (rv < 0)
+ return rv;
+ return count;
+}
+
+static const struct file_operations proc_containerid_operations = {
+ .write = proc_containerid_write,
+ .llseek = generic_file_llseek,
+};
+
#endif
#ifdef CONFIG_FAULT_INJECTION
@@ -2961,6 +2996,7 @@ static int proc_pid_patch_state(struct seq_file
*m, struct pid_namespace *ns, #ifdef CONFIG_AUDITSYSCALL
REG("loginuid", S_IWUSR|S_IRUGO, proc_loginuid_operations),
REG("sessionid", S_IRUGO, proc_sessionid_operations),
+ REG("containerid", S_IWUSR, proc_containerid_operations),
#endif
#ifdef CONFIG_FAULT_INJECTION
REG("make-it-fail", S_IRUGO|S_IWUSR,
proc_tid_comm_permission(struct inode *inode, int mask) #ifdef
CONFIG_AUDITSYSCALL REG("loginuid", S_IWUSR|S_IRUGO,
proc_loginuid_operations), REG("sessionid", S_IRUGO,
proc_sessionid_operations),
+ REG("containerid", S_IWUSR, proc_containerid_operations),
#endif
#ifdef CONFIG_FAULT_INJECTION
REG("make-it-fail", S_IRUGO|S_IWUSR,
proc_fault_inject_operations), diff --git a/include/linux/audit.h
b/include/linux/audit.h index af410d9..fe4ba3f 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -29,6 +29,7 @@
#define AUDIT_INO_UNSET ((unsigned long)-1)
#define AUDIT_DEV_UNSET ((dev_t)-1)
+#define INVALID_CID AUDIT_CID_UNSET
struct audit_sig_info {
uid_t uid;
@@ -321,6 +322,7 @@ static inline void audit_ptrace(struct
task_struct *t) extern int auditsc_get_stamp(struct audit_context
*ctx, struct timespec64 *t, unsigned int *serial);
extern int audit_set_loginuid(kuid_t loginuid);
+extern int audit_set_containerid(struct task_struct *tsk, u64 containerid);
static inline kuid_t audit_get_loginuid(struct task_struct *tsk)
{
@@ -332,6 +334,11 @@ static inline unsigned int
audit_get_sessionid(struct task_struct *tsk) return tsk->sessionid;
}
+static inline u64 audit_get_containerid(struct task_struct *tsk)
+{
+ return tsk->containerid;
+}
+
extern void __audit_ipc_obj(struct kern_ipc_perm *ipcp);
extern void __audit_ipc_set_perm(unsigned long qbytes, uid_t uid,
gid_t gid, umode_t mode); extern void __audit_bprm(struct
audit_get_sessionid(struct task_struct *tsk) {
return -1;
}
+static inline kuid_t audit_get_containerid(struct task_struct *tsk)
+{
+ return INVALID_CID;
+}
static inline void audit_ipc_obj(struct kern_ipc_perm *ipcp)
{ }
static inline void audit_ipc_set_perm(unsigned long qbytes, uid_t
audit_loginuid_set(struct task_struct *tsk) return
uid_valid(audit_get_loginuid(tsk)); }
+static inline bool audit_containerid_set(struct task_struct *tsk)
+{
+ return audit_get_containerid(tsk) != INVALID_CID;
+}
+
static inline void audit_log_string(struct audit_buffer *ab, const
char *buf) {
audit_log_n_string(ab, buf, strlen(buf));
diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index 6a53262..046bd0a 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -18,6 +18,7 @@
#include <linux/sched/rt.h>
#include <linux/livepatch.h>
#include <linux/mm_types.h>
+#include <linux/audit.h>
#include <asm/thread_info.h>
@@ -120,7 +121,8 @@
#ifdef CONFIG_AUDITSYSCALL
#define INIT_IDS \
.loginuid = INVALID_UID, \
- .sessionid = (unsigned int)-1,
+ .sessionid = (unsigned int)-1, \
+ .containerid = INVALID_CID,
#else
#define INIT_IDS
#endif
diff --git a/include/linux/sched.h b/include/linux/sched.h
index d258826..1b82191 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -796,6 +796,7 @@ struct task_struct {
#ifdef CONFIG_AUDITSYSCALL
kuid_t loginuid;
unsigned int sessionid;
+ u64 containerid;
#endif
struct seccomp seccomp;
diff --git a/include/uapi/linux/audit.h b/include/uapi/linux/audit.h
index 4e61a9e..921a71f 100644
--- a/include/uapi/linux/audit.h
+++ b/include/uapi/linux/audit.h
@@ -71,6 +71,7 @@
#define AUDIT_TTY_SET 1017 /* Set TTY auditing
status */ #define AUDIT_SET_FEATURE 1018 /* Turn an
audit feature on or off */ #define AUDIT_GET_FEATURE
1019 /* Get which features are enabled */ +#define
AUDIT_CONTAINER 1020 /* Define the container id
and information */ #define AUDIT_FIRST_USER_MSG 1100 /*
Userspace messages mostly uninteresting to kernel */ #define
AUDIT_USER_AVC 1107 /* We filter this
#define AUDIT_UID_UNSET (unsigned int)-1
+#define AUDIT_CID_UNSET ((u64)-1)
/* audit_rule_data supports filter rules with both integer and string
* fields. It corresponds with AUDIT_ADD_RULE, AUDIT_DEL_RULE and
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 4e0a4ac..29c8482 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -2073,6 +2073,90 @@ int audit_set_loginuid(kuid_t loginuid)
return rc;
}
+static int audit_set_containerid_perm(struct task_struct *task, u64
containerid) +{
+ struct task_struct *parent;
+ u64 pcontainerid, ccontainerid;
+
+ /* Don't allow to set our own containerid */
+ if (current == task)
+ return -EPERM;
+ /* Don't allow the containerid to be unset */
+ if (!cid_valid(containerid))
+ return -EINVAL;
+ /* if we don't have caps, reject */
+ if (!capable(CAP_AUDIT_CONTROL))
+ return -EPERM;
+ /* if containerid is unset, allow */
+ if (!audit_containerid_set(task))
+ return 0;
+ /* it is already set, and not inherited from the parent,
reject */
+ ccontainerid = audit_get_containerid(task);
+ rcu_read_lock();
+ parent = rcu_dereference(task->real_parent);
+ rcu_read_unlock();
+ task_lock(parent);
+ pcontainerid = audit_get_containerid(parent);
+ task_unlock(parent);
+ if (ccontainerid != pcontainerid)
+ return -EPERM;
+ return 0;
+}
+
+static void audit_log_set_containerid(struct task_struct *task, u64 oldcontainerid,
+ u64 containerid, int rc)
+{
+ struct audit_buffer *ab;
+ uid_t uid;
+ struct tty_struct *tty;
+
+ if (!audit_enabled)
+ return;
+
+ ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_CONTAINER);
+ if (!ab)
+ return;
+
+ uid = from_kuid(&init_user_ns, task_uid(current));
+ tty = audit_get_tty(current);
+
+ audit_log_format(ab, "op=set pid=%d uid=%u",
task_tgid_nr(current), uid);
+ audit_log_task_context(ab);
+ audit_log_format(ab, " auid=%u tty=%s ses=%u opid=%d
old-contid=%llu contid=%llu res=%d",
The preferred ordering would be: op, opid, old-contid, contid, pid, uid,
tty, ses, subj, comm, exe, res. This groups the searchable fields
together using the most common ordering so that parsing is simple.
Where would you like auid? It appears that just before uid would be the
right place, if not in place of uid, but this is just a guess since it
isn't consistent.
-Steve
Post by Richard Guy Briggs
+ from_kuid(&init_user_ns,
audit_get_loginuid(current)),
+ tty ? tty_name(tty) : "(none)",
audit_get_sessionid(current),
+ task_tgid_nr(task), oldcontainerid,
containerid, !rc); +
+ audit_put_tty(tty);
+ audit_log_end(ab);
+}
+
+/**
+ * audit_set_containerid - set current task's audit_context
containerid
+ *
+ * Returns 0 on success, -EPERM on permission failure.
+ *
+ * Called (set) from fs/proc/base.c::proc_containerid_write().
+ */
+int audit_set_containerid(struct task_struct *task, u64 containerid)
+{
+ u64 oldcontainerid;
+ int rc;
+
+ oldcontainerid = audit_get_containerid(task);
+
+ rc = audit_set_containerid_perm(task, containerid);
+ if (!rc) {
+ task_lock(task);
+ task->containerid = containerid;
+ task_unlock(task);
+ }
+
+ audit_log_set_containerid(task, oldcontainerid, containerid, rc);
+ return rc;
+}
+
/**
* __audit_mq_open - record audit data for a POSIX MQ open
--
Linux-audit mailing list
https://www.redhat.com/mailman/listinfo/linux-audit
- RGB

--
Richard Guy Briggs <***@redhat.com>
Sr. S/W Engineer, Kernel Security, Base Operating Systems
Remote, Ottawa, Red Hat Canada
IRC: rgb, SunRaycer
Voice: +1.647.777.2635, Internal: (81) 32635
Steve Grubb
2018-06-04 16:09:20 UTC
Permalink
Raw Message
Re: [RFC PATCH ghak32 V2 01/13] audit: add container id
To: Me
5:04 PM
On Fri, 16 Mar 2018 05:00:28 -0400
Post by Richard Guy Briggs
Implement the proc fs write to set the audit container ID of a
process, emitting an AUDIT_CONTAINER record to document the event.
This is a write from the container orchestrator task to a proc entry
of the form /proc/PID/containerid where PID is the process ID of the
newly created task that is to become the first task in a container,
or an additional task added to a container.
The write expects up to a u64 value (unset: 18446744073709551615).
type=CONTAINER msg=audit(1519903238.968:261): op=set pid=596 uid=0
subj=unconfined_u:unconfined_r:unconfined_t:s0-s0:c0.c1023 auid=0
tty=pts0 ses=1 opid=596 old-contid=18446744073709551615 contid=123455
res=0
The was one thing I was wondering about. Currently when we set the
loginuid, the record is AUDIT_LOGINUID. The corollary is that when we
set the container id, the event should be AUDIT_CONTAINERID or
AUDIT_CONTAINER_ID.
During syscall events, the path info is returned in a a record simply
called AUDIT_PATH, cwd info is returned in AUDIT_CWD. So, rather than
calling the record that gets attached to everything
AUDIT_CONTAINER_INFO, how about simply AUDIT_CONTAINER.
Post by Richard Guy Briggs
The "op" field indicates an initial set. The "pid" to "ses" fields
are the orchestrator while the "opid" field is the object's PID, the
process being "contained". Old and new container ID values are given
in the "contid" fields, while res indicates its success.
It is not permitted to self-set, unset or re-set the container ID. A
child inherits its parent's container ID, but then can be set only
once after.
See: https://github.com/linux-audit/audit-kernel/issues/32
---
fs/proc/base.c | 37 ++++++++++++++++++++
include/linux/audit.h | 16 +++++++++
include/linux/init_task.h | 4 ++-
include/linux/sched.h | 1 +
include/uapi/linux/audit.h | 2 ++
kernel/auditsc.c | 84
++++++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 143
insertions(+), 1 deletion(-)
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 60316b5..6ce4fbe 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -1299,6 +1299,41 @@ static ssize_t proc_sessionid_read(struct file
* file, char __user * buf, .read = proc_sessionid_read,
.llseek = generic_file_llseek,
};
+
+static ssize_t proc_containerid_write(struct file *file, const char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ struct inode *inode = file_inode(file);
+ u64 containerid;
+ int rv;
+ struct task_struct *task = get_proc_task(inode);
+
+ if (!task)
+ return -ESRCH;
+ if (*ppos != 0) {
+ /* No partial writes. */
+ put_task_struct(task);
+ return -EINVAL;
+ }
+
+ rv = kstrtou64_from_user(buf, count, 10, &containerid);
+ if (rv < 0) {
+ put_task_struct(task);
+ return rv;
+ }
+
+ rv = audit_set_containerid(task, containerid);
+ put_task_struct(task);
+ if (rv < 0)
+ return rv;
+ return count;
+}
+
+static const struct file_operations proc_containerid_operations = {
+ .write = proc_containerid_write,
+ .llseek = generic_file_llseek,
+};
+
#endif
#ifdef CONFIG_FAULT_INJECTION
@@ -2961,6 +2996,7 @@ static int proc_pid_patch_state(struct seq_file
*m, struct pid_namespace *ns, #ifdef CONFIG_AUDITSYSCALL
REG("loginuid", S_IWUSR|S_IRUGO, proc_loginuid_operations),
REG("sessionid", S_IRUGO, proc_sessionid_operations),
+ REG("containerid", S_IWUSR, proc_containerid_operations),
#endif
#ifdef CONFIG_FAULT_INJECTION
REG("make-it-fail", S_IRUGO|S_IWUSR,
proc_tid_comm_permission(struct inode *inode, int mask) #ifdef
CONFIG_AUDITSYSCALL REG("loginuid", S_IWUSR|S_IRUGO,
proc_loginuid_operations), REG("sessionid", S_IRUGO,
proc_sessionid_operations),
+ REG("containerid", S_IWUSR, proc_containerid_operations),
#endif
#ifdef CONFIG_FAULT_INJECTION
REG("make-it-fail", S_IRUGO|S_IWUSR,
proc_fault_inject_operations), diff --git a/include/linux/audit.h
b/include/linux/audit.h index af410d9..fe4ba3f 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -29,6 +29,7 @@
#define AUDIT_INO_UNSET ((unsigned long)-1)
#define AUDIT_DEV_UNSET ((dev_t)-1)
+#define INVALID_CID AUDIT_CID_UNSET
struct audit_sig_info {
uid_t uid;
@@ -321,6 +322,7 @@ static inline void audit_ptrace(struct
task_struct *t) extern int auditsc_get_stamp(struct audit_context
*ctx, struct timespec64 *t, unsigned int *serial);
extern int audit_set_loginuid(kuid_t loginuid);
+extern int audit_set_containerid(struct task_struct *tsk, u64 containerid);
static inline kuid_t audit_get_loginuid(struct task_struct *tsk)
{
@@ -332,6 +334,11 @@ static inline unsigned int
audit_get_sessionid(struct task_struct *tsk) return tsk->sessionid;
}
+static inline u64 audit_get_containerid(struct task_struct *tsk)
+{
+ return tsk->containerid;
+}
+
extern void __audit_ipc_obj(struct kern_ipc_perm *ipcp);
extern void __audit_ipc_set_perm(unsigned long qbytes, uid_t uid,
gid_t gid, umode_t mode); extern void __audit_bprm(struct
audit_get_sessionid(struct task_struct *tsk) {
return -1;
}
+static inline kuid_t audit_get_containerid(struct task_struct *tsk)
+{
+ return INVALID_CID;
+}
static inline void audit_ipc_obj(struct kern_ipc_perm *ipcp)
{ }
static inline void audit_ipc_set_perm(unsigned long qbytes, uid_t
audit_loginuid_set(struct task_struct *tsk) return
uid_valid(audit_get_loginuid(tsk)); }
+static inline bool audit_containerid_set(struct task_struct *tsk)
+{
+ return audit_get_containerid(tsk) != INVALID_CID;
+}
+
static inline void audit_log_string(struct audit_buffer *ab, const
char *buf) {
audit_log_n_string(ab, buf, strlen(buf));
diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index 6a53262..046bd0a 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -18,6 +18,7 @@
#include <linux/sched/rt.h>
#include <linux/livepatch.h>
#include <linux/mm_types.h>
+#include <linux/audit.h>
#include <asm/thread_info.h>
@@ -120,7 +121,8 @@
#ifdef CONFIG_AUDITSYSCALL
#define INIT_IDS \
.loginuid = INVALID_UID, \
- .sessionid = (unsigned int)-1,
+ .sessionid = (unsigned int)-1, \
+ .containerid = INVALID_CID,
#else
#define INIT_IDS
#endif
diff --git a/include/linux/sched.h b/include/linux/sched.h
index d258826..1b82191 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -796,6 +796,7 @@ struct task_struct {
#ifdef CONFIG_AUDITSYSCALL
kuid_t loginuid;
unsigned int sessionid;
+ u64 containerid;
#endif
struct seccomp seccomp;
diff --git a/include/uapi/linux/audit.h b/include/uapi/linux/audit.h
index 4e61a9e..921a71f 100644
--- a/include/uapi/linux/audit.h
+++ b/include/uapi/linux/audit.h
@@ -71,6 +71,7 @@
#define AUDIT_TTY_SET 1017 /* Set TTY auditing
status */ #define AUDIT_SET_FEATURE 1018 /* Turn an
audit feature on or off */ #define AUDIT_GET_FEATURE
1019 /* Get which features are enabled */ +#define
AUDIT_CONTAINER 1020 /* Define the container id
and information */ #define AUDIT_FIRST_USER_MSG 1100 /*
Userspace messages mostly uninteresting to kernel */ #define
AUDIT_USER_AVC 1107 /* We filter this
#define AUDIT_UID_UNSET (unsigned int)-1
+#define AUDIT_CID_UNSET ((u64)-1)
/* audit_rule_data supports filter rules with both integer and string
* fields. It corresponds with AUDIT_ADD_RULE, AUDIT_DEL_RULE and
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 4e0a4ac..29c8482 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -2073,6 +2073,90 @@ int audit_set_loginuid(kuid_t loginuid)
return rc;
}
+static int audit_set_containerid_perm(struct task_struct *task, u64
containerid) +{
+ struct task_struct *parent;
+ u64 pcontainerid, ccontainerid;
+
+ /* Don't allow to set our own containerid */
+ if (current == task)
+ return -EPERM;
+ /* Don't allow the containerid to be unset */
+ if (!cid_valid(containerid))
+ return -EINVAL;
+ /* if we don't have caps, reject */
+ if (!capable(CAP_AUDIT_CONTROL))
+ return -EPERM;
+ /* if containerid is unset, allow */
+ if (!audit_containerid_set(task))
+ return 0;
+ /* it is already set, and not inherited from the parent,
reject */
+ ccontainerid = audit_get_containerid(task);
+ rcu_read_lock();
+ parent = rcu_dereference(task->real_parent);
+ rcu_read_unlock();
+ task_lock(parent);
+ pcontainerid = audit_get_containerid(parent);
+ task_unlock(parent);
+ if (ccontainerid != pcontainerid)
+ return -EPERM;
+ return 0;
+}
+
+static void audit_log_set_containerid(struct task_struct *task, u64
oldcontainerid,
+ u64 containerid, int rc)
+{
+ struct audit_buffer *ab;
+ uid_t uid;
+ struct tty_struct *tty;
+
+ if (!audit_enabled)
+ return;
+
+ ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_CONTAINER);
+ if (!ab)
+ return;
+
+ uid = from_kuid(&init_user_ns, task_uid(current));
+ tty = audit_get_tty(current);
+
+ audit_log_format(ab, "op=set pid=%d uid=%u",
task_tgid_nr(current), uid);
+ audit_log_task_context(ab);
+ audit_log_format(ab, " auid=%u tty=%s ses=%u opid=%d
old-contid=%llu contid=%llu res=%d",
The preferred ordering would be: op, opid, old-contid, contid, pid, uid,
tty, ses, subj, comm, exe, res. This groups the searchable fields
together using the most common ordering so that parsing is simple.
Where would you like auid? It appears that just before uid would be the
right place, if not in place of uid, but this is just a guess since it
isn't consistent.
Just after the uid is the proper place. The most common sequence is:
pid, uid, auid, tty, session, subject context, comm, exe.

Thanks,
-Steve
Richard Guy Briggs
2018-06-04 20:23:01 UTC
Permalink
Raw Message
Post by Steve Grubb
Re: [RFC PATCH ghak32 V2 01/13] audit: add container id
To: Me
5:04 PM
On Fri, 16 Mar 2018 05:00:28 -0400
Post by Richard Guy Briggs
Implement the proc fs write to set the audit container ID of a
process, emitting an AUDIT_CONTAINER record to document the event.
This is a write from the container orchestrator task to a proc entry
of the form /proc/PID/containerid where PID is the process ID of the
newly created task that is to become the first task in a container,
or an additional task added to a container.
The write expects up to a u64 value (unset: 18446744073709551615).
type=CONTAINER msg=audit(1519903238.968:261): op=set pid=596 uid=0
subj=unconfined_u:unconfined_r:unconfined_t:s0-s0:c0.c1023 auid=0
tty=pts0 ses=1 opid=596 old-contid=18446744073709551615 contid=123455
res=0
The was one thing I was wondering about. Currently when we set the
loginuid, the record is AUDIT_LOGINUID. The corollary is that when we
set the container id, the event should be AUDIT_CONTAINERID or
AUDIT_CONTAINER_ID.
During syscall events, the path info is returned in a a record simply
called AUDIT_PATH, cwd info is returned in AUDIT_CWD. So, rather than
calling the record that gets attached to everything
AUDIT_CONTAINER_INFO, how about simply AUDIT_CONTAINER.
Post by Richard Guy Briggs
The "op" field indicates an initial set. The "pid" to "ses" fields
are the orchestrator while the "opid" field is the object's PID, the
process being "contained". Old and new container ID values are given
in the "contid" fields, while res indicates its success.
It is not permitted to self-set, unset or re-set the container ID. A
child inherits its parent's container ID, but then can be set only
once after.
See: https://github.com/linux-audit/audit-kernel/issues/32
---
fs/proc/base.c | 37 ++++++++++++++++++++
include/linux/audit.h | 16 +++++++++
include/linux/init_task.h | 4 ++-
include/linux/sched.h | 1 +
include/uapi/linux/audit.h | 2 ++
kernel/auditsc.c | 84
++++++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 143
insertions(+), 1 deletion(-)
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 60316b5..6ce4fbe 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -1299,6 +1299,41 @@ static ssize_t proc_sessionid_read(struct file
* file, char __user * buf, .read = proc_sessionid_read,
.llseek = generic_file_llseek,
};
+
+static ssize_t proc_containerid_write(struct file *file, const char
__user *buf,
+ size_t count, loff_t *ppos)
+{
+ struct inode *inode = file_inode(file);
+ u64 containerid;
+ int rv;
+ struct task_struct *task = get_proc_task(inode);
+
+ if (!task)
+ return -ESRCH;
+ if (*ppos != 0) {
+ /* No partial writes. */
+ put_task_struct(task);
+ return -EINVAL;
+ }
+
+ rv = kstrtou64_from_user(buf, count, 10, &containerid);
+ if (rv < 0) {
+ put_task_struct(task);
+ return rv;
+ }
+
+ rv = audit_set_containerid(task, containerid);
+ put_task_struct(task);
+ if (rv < 0)
+ return rv;
+ return count;
+}
+
+static const struct file_operations proc_containerid_operations = {
+ .write = proc_containerid_write,
+ .llseek = generic_file_llseek,
+};
+
#endif
#ifdef CONFIG_FAULT_INJECTION
@@ -2961,6 +2996,7 @@ static int proc_pid_patch_state(struct seq_file
*m, struct pid_namespace *ns, #ifdef CONFIG_AUDITSYSCALL
REG("loginuid", S_IWUSR|S_IRUGO, proc_loginuid_operations),
REG("sessionid", S_IRUGO, proc_sessionid_operations),
+ REG("containerid", S_IWUSR, proc_containerid_operations),
#endif
#ifdef CONFIG_FAULT_INJECTION
REG("make-it-fail", S_IRUGO|S_IWUSR,
proc_tid_comm_permission(struct inode *inode, int mask) #ifdef
CONFIG_AUDITSYSCALL REG("loginuid", S_IWUSR|S_IRUGO,
proc_loginuid_operations), REG("sessionid", S_IRUGO,
proc_sessionid_operations),
+ REG("containerid", S_IWUSR, proc_containerid_operations),
#endif
#ifdef CONFIG_FAULT_INJECTION
REG("make-it-fail", S_IRUGO|S_IWUSR,
proc_fault_inject_operations), diff --git a/include/linux/audit.h
b/include/linux/audit.h index af410d9..fe4ba3f 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -29,6 +29,7 @@
#define AUDIT_INO_UNSET ((unsigned long)-1)
#define AUDIT_DEV_UNSET ((dev_t)-1)
+#define INVALID_CID AUDIT_CID_UNSET
struct audit_sig_info {
uid_t uid;
@@ -321,6 +322,7 @@ static inline void audit_ptrace(struct
task_struct *t) extern int auditsc_get_stamp(struct audit_context
*ctx, struct timespec64 *t, unsigned int *serial);
extern int audit_set_loginuid(kuid_t loginuid);
+extern int audit_set_containerid(struct task_struct *tsk, u64 containerid);
static inline kuid_t audit_get_loginuid(struct task_struct *tsk)
{
@@ -332,6 +334,11 @@ static inline unsigned int
audit_get_sessionid(struct task_struct *tsk) return tsk->sessionid;
}
+static inline u64 audit_get_containerid(struct task_struct *tsk)
+{
+ return tsk->containerid;
+}
+
extern void __audit_ipc_obj(struct kern_ipc_perm *ipcp);
extern void __audit_ipc_set_perm(unsigned long qbytes, uid_t uid,
gid_t gid, umode_t mode); extern void __audit_bprm(struct
audit_get_sessionid(struct task_struct *tsk) {
return -1;
}
+static inline kuid_t audit_get_containerid(struct task_struct *tsk)
+{
+ return INVALID_CID;
+}
static inline void audit_ipc_obj(struct kern_ipc_perm *ipcp)
{ }
static inline void audit_ipc_set_perm(unsigned long qbytes, uid_t
audit_loginuid_set(struct task_struct *tsk) return
uid_valid(audit_get_loginuid(tsk)); }
+static inline bool audit_containerid_set(struct task_struct *tsk)
+{
+ return audit_get_containerid(tsk) != INVALID_CID;
+}
+
static inline void audit_log_string(struct audit_buffer *ab, const
char *buf) {
audit_log_n_string(ab, buf, strlen(buf));
diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index 6a53262..046bd0a 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -18,6 +18,7 @@
#include <linux/sched/rt.h>
#include <linux/livepatch.h>
#include <linux/mm_types.h>
+#include <linux/audit.h>
#include <asm/thread_info.h>
@@ -120,7 +121,8 @@
#ifdef CONFIG_AUDITSYSCALL
#define INIT_IDS \
.loginuid = INVALID_UID, \
- .sessionid = (unsigned int)-1,
+ .sessionid = (unsigned int)-1, \
+ .containerid = INVALID_CID,
#else
#define INIT_IDS
#endif
diff --git a/include/linux/sched.h b/include/linux/sched.h
index d258826..1b82191 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -796,6 +796,7 @@ struct task_struct {
#ifdef CONFIG_AUDITSYSCALL
kuid_t loginuid;
unsigned int sessionid;
+ u64 containerid;
#endif
struct seccomp seccomp;
diff --git a/include/uapi/linux/audit.h b/include/uapi/linux/audit.h
index 4e61a9e..921a71f 100644
--- a/include/uapi/linux/audit.h
+++ b/include/uapi/linux/audit.h
@@ -71,6 +71,7 @@
#define AUDIT_TTY_SET 1017 /* Set TTY auditing
status */ #define AUDIT_SET_FEATURE 1018 /* Turn an
audit feature on or off */ #define AUDIT_GET_FEATURE
1019 /* Get which features are enabled */ +#define
AUDIT_CONTAINER 1020 /* Define the container id
and information */ #define AUDIT_FIRST_USER_MSG 1100 /*
Userspace messages mostly uninteresting to kernel */ #define
AUDIT_USER_AVC 1107 /* We filter this
#define AUDIT_UID_UNSET (unsigned int)-1
+#define AUDIT_CID_UNSET ((u64)-1)
/* audit_rule_data supports filter rules with both integer and string
* fields. It corresponds with AUDIT_ADD_RULE, AUDIT_DEL_RULE and
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 4e0a4ac..29c8482 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -2073,6 +2073,90 @@ int audit_set_loginuid(kuid_t loginuid)
return rc;
}
+static int audit_set_containerid_perm(struct task_struct *task, u64
containerid) +{
+ struct task_struct *parent;
+ u64 pcontainerid, ccontainerid;
+
+ /* Don't allow to set our own containerid */
+ if (current == task)
+ return -EPERM;
+ /* Don't allow the containerid to be unset */
+ if (!cid_valid(containerid))
+ return -EINVAL;
+ /* if we don't have caps, reject */
+ if (!capable(CAP_AUDIT_CONTROL))
+ return -EPERM;
+ /* if containerid is unset, allow */
+ if (!audit_containerid_set(task))
+ return 0;
+ /* it is already set, and not inherited from the parent,
reject */
+ ccontainerid = audit_get_containerid(task);
+ rcu_read_lock();
+ parent = rcu_dereference(task->real_parent);
+ rcu_read_unlock();
+ task_lock(parent);
+ pcontainerid = audit_get_containerid(parent);
+ task_unlock(parent);
+ if (ccontainerid != pcontainerid)
+ return -EPERM;
+ return 0;
+}
+
+static void audit_log_set_containerid(struct task_struct *task, u64
oldcontainerid,
+ u64 containerid, int rc)
+{
+ struct audit_buffer *ab;
+ uid_t uid;
+ struct tty_struct *tty;
+
+ if (!audit_enabled)
+ return;
+
+ ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_CONTAINER);
+ if (!ab)
+ return;
+
+ uid = from_kuid(&init_user_ns, task_uid(current));
+ tty = audit_get_tty(current);
+
+ audit_log_format(ab, "op=set pid=%d uid=%u",
task_tgid_nr(current), uid);
+ audit_log_task_context(ab);
+ audit_log_format(ab, " auid=%u tty=%s ses=%u opid=%d
old-contid=%llu contid=%llu res=%d",
The preferred ordering would be: op, opid, old-contid, contid, pid, uid,
tty, ses, subj, comm, exe, res. This groups the searchable fields
together using the most common ordering so that parsing is simple.
Where would you like auid? It appears that just before uid would be the
right place, if not in place of uid, but this is just a guess since it
isn't consistent.
pid, uid, auid, tty, session, subject context, comm, exe.
Not according to:
http://people.redhat.com/sgrubb/audit/record-fields.html
Post by Steve Grubb
-Steve
- RGB

--
Richard Guy Briggs <***@redhat.com>
Sr. S/W Engineer, Kernel Security, Base Operating Systems
Remote, Ottawa, Red Hat Canada
IRC: rgb, SunRaycer
Voice: +1.647.777.2635, Internal: (81) 32635
Richard Guy Briggs
2018-06-04 20:30:48 UTC
Permalink
Raw Message
Post by Richard Guy Briggs
Post by Steve Grubb
Re: [RFC PATCH ghak32 V2 01/13] audit: add container id
To: Me
5:04 PM
On Fri, 16 Mar 2018 05:00:28 -0400
Post by Richard Guy Briggs
Implement the proc fs write to set the audit container ID of a
process, emitting an AUDIT_CONTAINER record to document the event.
This is a write from the container orchestrator task to a proc entry
of the form /proc/PID/containerid where PID is the process ID of the
newly created task that is to become the first task in a container,
or an additional task added to a container.
The write expects up to a u64 value (unset: 18446744073709551615).
type=CONTAINER msg=audit(1519903238.968:261): op=set pid=596 uid=0
subj=unconfined_u:unconfined_r:unconfined_t:s0-s0:c0.c1023 auid=0
tty=pts0 ses=1 opid=596 old-contid=18446744073709551615 contid=123455
res=0
The was one thing I was wondering about. Currently when we set the
loginuid, the record is AUDIT_LOGINUID. The corollary is that when we
set the container id, the event should be AUDIT_CONTAINERID or
AUDIT_CONTAINER_ID.
During syscall events, the path info is returned in a a record simply
called AUDIT_PATH, cwd info is returned in AUDIT_CWD. So, rather than
calling the record that gets attached to everything
AUDIT_CONTAINER_INFO, how about simply AUDIT_CONTAINER.
Post by Richard Guy Briggs
The "op" field indicates an initial set. The "pid" to "ses" fields
are the orchestrator while the "opid" field is the object's PID, the
process being "contained". Old and new container ID values are given
in the "contid" fields, while res indicates its success.
It is not permitted to self-set, unset or re-set the container ID. A
child inherits its parent's container ID, but then can be set only
once after.
See: https://github.com/linux-audit/audit-kernel/issues/32
---
fs/proc/base.c | 37 ++++++++++++++++++++
include/linux/audit.h | 16 +++++++++
include/linux/init_task.h | 4 ++-
include/linux/sched.h | 1 +
include/uapi/linux/audit.h | 2 ++
kernel/auditsc.c | 84
++++++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 143
insertions(+), 1 deletion(-)
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 60316b5..6ce4fbe 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -1299,6 +1299,41 @@ static ssize_t proc_sessionid_read(struct file
* file, char __user * buf, .read = proc_sessionid_read,
.llseek = generic_file_llseek,
};
+
+static ssize_t proc_containerid_write(struct file *file, const char
__user *buf,
+ size_t count, loff_t *ppos)
+{
+ struct inode *inode = file_inode(file);
+ u64 containerid;
+ int rv;
+ struct task_struct *task = get_proc_task(inode);
+
+ if (!task)
+ return -ESRCH;
+ if (*ppos != 0) {
+ /* No partial writes. */
+ put_task_struct(task);
+ return -EINVAL;
+ }
+
+ rv = kstrtou64_from_user(buf, count, 10, &containerid);
+ if (rv < 0) {
+ put_task_struct(task);
+ return rv;
+ }
+
+ rv = audit_set_containerid(task, containerid);
+ put_task_struct(task);
+ if (rv < 0)
+ return rv;
+ return count;
+}
+
+static const struct file_operations proc_containerid_operations = {
+ .write = proc_containerid_write,
+ .llseek = generic_file_llseek,
+};
+
#endif
#ifdef CONFIG_FAULT_INJECTION
@@ -2961,6 +2996,7 @@ static int proc_pid_patch_state(struct seq_file
*m, struct pid_namespace *ns, #ifdef CONFIG_AUDITSYSCALL
REG("loginuid", S_IWUSR|S_IRUGO, proc_loginuid_operations),
REG("sessionid", S_IRUGO, proc_sessionid_operations),
+ REG("containerid", S_IWUSR, proc_containerid_operations),
#endif
#ifdef CONFIG_FAULT_INJECTION
REG("make-it-fail", S_IRUGO|S_IWUSR,
proc_tid_comm_permission(struct inode *inode, int mask) #ifdef
CONFIG_AUDITSYSCALL REG("loginuid", S_IWUSR|S_IRUGO,
proc_loginuid_operations), REG("sessionid", S_IRUGO,
proc_sessionid_operations),
+ REG("containerid", S_IWUSR, proc_containerid_operations),
#endif
#ifdef CONFIG_FAULT_INJECTION
REG("make-it-fail", S_IRUGO|S_IWUSR,
proc_fault_inject_operations), diff --git a/include/linux/audit.h
b/include/linux/audit.h index af410d9..fe4ba3f 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -29,6 +29,7 @@
#define AUDIT_INO_UNSET ((unsigned long)-1)
#define AUDIT_DEV_UNSET ((dev_t)-1)
+#define INVALID_CID AUDIT_CID_UNSET
struct audit_sig_info {
uid_t uid;
@@ -321,6 +322,7 @@ static inline void audit_ptrace(struct
task_struct *t) extern int auditsc_get_stamp(struct audit_context
*ctx, struct timespec64 *t, unsigned int *serial);
extern int audit_set_loginuid(kuid_t loginuid);
+extern int audit_set_containerid(struct task_struct *tsk, u64
containerid);
static inline kuid_t audit_get_loginuid(struct task_struct *tsk)
{
@@ -332,6 +334,11 @@ static inline unsigned int
audit_get_sessionid(struct task_struct *tsk) return tsk->sessionid;
}
+static inline u64 audit_get_containerid(struct task_struct *tsk)
+{
+ return tsk->containerid;
+}
+
extern void __audit_ipc_obj(struct kern_ipc_perm *ipcp);
extern void __audit_ipc_set_perm(unsigned long qbytes, uid_t uid,
gid_t gid, umode_t mode); extern void __audit_bprm(struct
audit_get_sessionid(struct task_struct *tsk) {
return -1;
}
+static inline kuid_t audit_get_containerid(struct task_struct *tsk)
+{
+ return INVALID_CID;
+}
static inline void audit_ipc_obj(struct kern_ipc_perm *ipcp)
{ }
static inline void audit_ipc_set_perm(unsigned long qbytes, uid_t
audit_loginuid_set(struct task_struct *tsk) return
uid_valid(audit_get_loginuid(tsk)); }
+static inline bool audit_containerid_set(struct task_struct *tsk)
+{
+ return audit_get_containerid(tsk) != INVALID_CID;
+}
+
static inline void audit_log_string(struct audit_buffer *ab, const
char *buf) {
audit_log_n_string(ab, buf, strlen(buf));
diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index 6a53262..046bd0a 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -18,6 +18,7 @@
#include <linux/sched/rt.h>
#include <linux/livepatch.h>
#include <linux/mm_types.h>
+#include <linux/audit.h>
#include <asm/thread_info.h>
@@ -120,7 +121,8 @@
#ifdef CONFIG_AUDITSYSCALL
#define INIT_IDS \
.loginuid = INVALID_UID, \
- .sessionid = (unsigned int)-1,
+ .sessionid = (unsigned int)-1, \
+ .containerid = INVALID_CID,
#else
#define INIT_IDS
#endif
diff --git a/include/linux/sched.h b/include/linux/sched.h
index d258826..1b82191 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -796,6 +796,7 @@ struct task_struct {
#ifdef CONFIG_AUDITSYSCALL
kuid_t loginuid;
unsigned int sessionid;
+ u64 containerid;
#endif
struct seccomp seccomp;
diff --git a/include/uapi/linux/audit.h b/include/uapi/linux/audit.h
index 4e61a9e..921a71f 100644
--- a/include/uapi/linux/audit.h
+++ b/include/uapi/linux/audit.h
@@ -71,6 +71,7 @@
#define AUDIT_TTY_SET 1017 /* Set TTY auditing
status */ #define AUDIT_SET_FEATURE 1018 /* Turn an
audit feature on or off */ #define AUDIT_GET_FEATURE
1019 /* Get which features are enabled */ +#define
AUDIT_CONTAINER 1020 /* Define the container id
and information */ #define AUDIT_FIRST_USER_MSG 1100 /*
Userspace messages mostly uninteresting to kernel */ #define
AUDIT_USER_AVC 1107 /* We filter this
#define AUDIT_UID_UNSET (unsigned int)-1
+#define AUDIT_CID_UNSET ((u64)-1)
/* audit_rule_data supports filter rules with both integer and string
* fields. It corresponds with AUDIT_ADD_RULE, AUDIT_DEL_RULE and
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 4e0a4ac..29c8482 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -2073,6 +2073,90 @@ int audit_set_loginuid(kuid_t loginuid)
return rc;
}
+static int audit_set_containerid_perm(struct task_struct *task, u64
containerid) +{
+ struct task_struct *parent;
+ u64 pcontainerid, ccontainerid;
+
+ /* Don't allow to set our own containerid */
+ if (current == task)
+ return -EPERM;
+ /* Don't allow the containerid to be unset */
+ if (!cid_valid(containerid))
+ return -EINVAL;
+ /* if we don't have caps, reject */
+ if (!capable(CAP_AUDIT_CONTROL))
+ return -EPERM;
+ /* if containerid is unset, allow */
+ if (!audit_containerid_set(task))
+ return 0;
+ /* it is already set, and not inherited from the parent,
reject */
+ ccontainerid = audit_get_containerid(task);
+ rcu_read_lock();
+ parent = rcu_dereference(task->real_parent);
+ rcu_read_unlock();
+ task_lock(parent);
+ pcontainerid = audit_get_containerid(parent);
+ task_unlock(parent);
+ if (ccontainerid != pcontainerid)
+ return -EPERM;
+ return 0;
+}
+
+static void audit_log_set_containerid(struct task_struct *task, u64
oldcontainerid,
+ u64 containerid, int rc)
+{
+ struct audit_buffer *ab;
+ uid_t uid;
+ struct tty_struct *tty;
+
+ if (!audit_enabled)
+ return;
+
+ ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_CONTAINER);
+ if (!ab)
+ return;
+
+ uid = from_kuid(&init_user_ns, task_uid(current));
+ tty = audit_get_tty(current);
+
+ audit_log_format(ab, "op=set pid=%d uid=%u",
task_tgid_nr(current), uid);
+ audit_log_task_context(ab);
+ audit_log_format(ab, " auid=%u tty=%s ses=%u opid=%d
old-contid=%llu contid=%llu res=%d",
The preferred ordering would be: op, opid, old-contid, contid, pid, uid,
tty, ses, subj, comm, exe, res. This groups the searchable fields
together using the most common ordering so that parsing is simple.
Where would you like auid? It appears that just before uid would be the
right place, if not in place of uid, but this is just a guess since it
isn't consistent.
pid, uid, auid, tty, session, subject context, comm, exe.
http://people.redhat.com/sgrubb/audit/record-fields.html
Ok, maybe I spoke a bit quickly... Most of the inital ones I saw were
in the reverse order, but there is a clump about 3/4 of the way down
which includes VIRT and USER events.
Post by Richard Guy Briggs
Post by Steve Grubb
-Steve
- RGB
- RGB

--
Richard Guy Briggs <***@redhat.com>
Sr. S/W Engineer, Kernel Security, Base Operating Systems
Remote, Ottawa, Red Hat Canada
IRC: rgb, SunRaycer
Voice: +1.647.777.2635, Internal: (81) 32635

Steve Grubb
2018-05-30 13:20:01 UTC
Permalink
Raw Message
Implement audit kernel container ID.
This patchset is a second RFC based on the proposal document (V3)
https://www.redhat.com/archives/linux-audit/2018-January/msg00014.html
So, if you work on a container orchestrator, how exactly is this set of
interfaces to be used and in what order?

Thanks,
-Steve
The first patch implements the proc fs write to set the audit container
ID of a process, emitting an AUDIT_CONTAINER record to announce the
registration of that container ID on that process. This patch requires
userspace support for record acceptance and proper type display.
The second checks for children or co-threads and refuses to set the
container ID if either are present. (This policy could be changed to
set both with the same container ID provided they meet the rest of the
requirements.)
The third implements the auxiliary record AUDIT_CONTAINER_INFO if a
container ID is identifiable with an event. This patch requires
userspace support for proper type display.
The fourth adds container ID filtering to the exit, exclude and user
lists. This patch requires auditctil userspace support for the
--containerid option.
The 5th adds signal and ptrace support.
The 6th creates a local audit context to be able to bind a standalone
record with a locally created auxiliary record.
The 7th, 8th, 9th, 10th patches add container ID records to standalone
records. Some of these may end up being syscall auxiliary records and
won't need this specific support since they'll be supported via
syscalls.
The 11th adds network namespace container ID labelling based on member
tasks' container ID labels.
The 12th adds container ID support to standalone netfilter records that
don't have a task context and lists each container to which that net
namespace belongs.
The 13th implements reading the container ID from the proc filesystem
for debugging. This patch isn't planned for upstream inclusion.
Feedback please!
sleep 2&
child=$!
echo 123456 > /proc/$child/containerid; echo $?
ausearch -ts recent -m container
echo child:$child contid:$( cat /proc/$child/containerid)
type=CONTAINER msg=audit(1521122590.315:222): op=set pid=689 uid=0
subj=unconfined_u:unconfined_r:unconfined_t:s0-s0:c0.c1023 auid=0 tty=pts0
ses=3 opid=707 old-contid=18446744073709551615 contid=123456 res=1
containerid=123459
key=tmpcontainerid
auditctl -a exit,always -F dir=/tmp -F perm=wa -F containerid=$containerid
-F key=$key perl -e "sleep 1; open(my \$tmpfile, '>', \"/tmp/$key\");
close(\$tmpfile);" & child=$!
echo $containerid > /proc/$child/containerid
sleep 2
ausearch -i -ts recent -k $key
auditctl -d exit,always -F dir=/tmp -F perm=wa -F containerid=$containerid
-F key=$key rm -f /tmp/$key
type=CONTAINER_INFO msg=audit(1521122591.614:227): op=task contid=123459
proctitle=7065726C002D6500736C65657020313B206F70656E286D792024746D7066696C
652C20273E272C20222F746D702F746D70636F6E7461696E6572696422293B20636C6F73652
824746D7066696C65293B type=PATH msg=audit(1521122591.614:227): item=1
name="/tmp/tmpcontainerid" inode=18427 dev=00:26 mode=0100644 ouid=0
ogid=0 rdev=00:00 obj=unconfined_u:object_r:user_tmp_t:s0 nametype=CREATE
cap_fp=0000000000000000 cap_fi=0000000000000000 cap_fe=0 cap_fver=0
type=PATH msg=audit(1521122591.614:227): item=0 name="/tmp/" inode=13513
dev=00:26 mode=041777 ouid=0 ogid=0 rdev=00:00
obj=system_u:object_r:tmp_t:s0 nametype=PARENT cap_fp=0000000000000000
cap_fi=0000000000000000 cap_fe=0 cap_fver=0 type=CWD
msg=audit(1521122591.614:227): cwd="/root"
type=SYSCALL msg=audit(1521122591.614:227): arch=c000003e syscall=257
success=yes exit=3 a0=ffffffffffffff9c a1=55db90a28900 a2=241 a3=1b6
items=2 ppid=689 pid=724 auid=0 uid=0 gid=0 euid=0 suid=0 fsuid=0 egid=0
sgid=0 fsgid=0 tty=pts0 ses=3 comm="perl" exe="/usr/bin/perl"
subj=unconfined_u:unconfined_r:unconfined_t:s0-s0:c0.c1023
key="tmpcontainerid"
https://github.com/linux-audit/audit-kernel/issues/32
https://github.com/linux-audit/audit-userspace/issues/40
https://github.com/linux-audit/audit-testsuite/issues/64
audit: add container id
audit: check children and threading before allowing containerid
audit: log container info of syscalls
audit: add containerid filtering
audit: add containerid support for ptrace and signals
audit: add support for non-syscall auxiliary records
audit: add container aux record to watch/tree/mark
audit: add containerid support for tty_audit
audit: add containerid support for config/feature/user records
audit: add containerid support for seccomp and anom_abend records
audit: add support for containerid to network namespaces
audit: NETFILTER_PKT: record each container ID associated with a netNS
debug audit: read container ID of a process
drivers/tty/tty_audit.c | 5 +-
fs/proc/base.c | 53 ++++++++++++++++
include/linux/audit.h | 43 +++++++++++++
include/linux/init_task.h | 4 +-
include/linux/sched.h | 1 +
include/net/net_namespace.h | 12 ++++
include/uapi/linux/audit.h | 8 ++-
kernel/audit.c | 75 ++++++++++++++++++++---
kernel/audit.h | 3 +
kernel/audit_fsnotify.c | 5 +-
kernel/audit_tree.c | 5 +-
kernel/audit_watch.c | 33 +++++-----
kernel/auditfilter.c | 52 +++++++++++++++-
kernel/auditsc.c | 145
++++++++++++++++++++++++++++++++++++++++++-- kernel/nsproxy.c |
6 ++
net/core/net_namespace.c | 45 ++++++++++++++
net/netfilter/xt_AUDIT.c | 15 ++++-
17 files changed, 473 insertions(+), 37 deletions(-)
Richard Guy Briggs
2018-05-30 17:33:28 UTC
Permalink
Raw Message
Post by Steve Grubb
Implement audit kernel container ID.
This patchset is a second RFC based on the proposal document (V3)
https://www.redhat.com/archives/linux-audit/2018-January/msg00014.html
So, if you work on a container orchestrator, how exactly is this set of
interfaces to be used and in what order?
It was designed keeping in mind the Virtuallization Manager Guest
Lifecycle Events document.
https://github.com/linux-audit/audit-documentation/wiki/SPEC-Virtualization-Manager-Guest-Lifecycle-Events

The orchestrator would start setting things up and when it knows the PID
of the conainer task but before that task has had a chance to thread or
spawn children it registers the audit container ID via the /proc
interface. After that, it consults audit for any events maching that
ID.
Post by Steve Grubb
Thanks,
-Steve
The first patch implements the proc fs write to set the audit container
ID of a process, emitting an AUDIT_CONTAINER record to announce the
registration of that container ID on that process. This patch requires
userspace support for record acceptance and proper type display.
The second checks for children or co-threads and refuses to set the
container ID if either are present. (This policy could be changed to
set both with the same container ID provided they meet the rest of the
requirements.)
The third implements the auxiliary record AUDIT_CONTAINER_INFO if a
container ID is identifiable with an event. This patch requires
userspace support for proper type display.
The fourth adds container ID filtering to the exit, exclude and user
lists. This patch requires auditctil userspace support for the
--containerid option.
The 5th adds signal and ptrace support.
The 6th creates a local audit context to be able to bind a standalone
record with a locally created auxiliary record.
The 7th, 8th, 9th, 10th patches add container ID records to standalone
records. Some of these may end up being syscall auxiliary records and
won't need this specific support since they'll be supported via
syscalls.
The 11th adds network namespace container ID labelling based on member
tasks' container ID labels.
The 12th adds container ID support to standalone netfilter records that
don't have a task context and lists each container to which that net
namespace belongs.
The 13th implements reading the container ID from the proc filesystem
for debugging. This patch isn't planned for upstream inclusion.
Feedback please!
sleep 2&
child=$!
echo 123456 > /proc/$child/containerid; echo $?
ausearch -ts recent -m container
echo child:$child contid:$( cat /proc/$child/containerid)
type=CONTAINER msg=audit(1521122590.315:222): op=set pid=689 uid=0
subj=unconfined_u:unconfined_r:unconfined_t:s0-s0:c0.c1023 auid=0 tty=pts0
ses=3 opid=707 old-contid=18446744073709551615 contid=123456 res=1
containerid=123459
key=tmpcontainerid
auditctl -a exit,always -F dir=/tmp -F perm=wa -F containerid=$containerid
-F key=$key perl -e "sleep 1; open(my \$tmpfile, '>', \"/tmp/$key\");
close(\$tmpfile);" & child=$!
echo $containerid > /proc/$child/containerid
sleep 2
ausearch -i -ts recent -k $key
auditctl -d exit,always -F dir=/tmp -F perm=wa -F containerid=$containerid
-F key=$key rm -f /tmp/$key
type=CONTAINER_INFO msg=audit(1521122591.614:227): op=task contid=123459
proctitle=7065726C002D6500736C65657020313B206F70656E286D792024746D7066696C
652C20273E272C20222F746D702F746D70636F6E7461696E6572696422293B20636C6F73652
824746D7066696C65293B type=PATH msg=audit(1521122591.614:227): item=1
name="/tmp/tmpcontainerid" inode=18427 dev=00:26 mode=0100644 ouid=0
ogid=0 rdev=00:00 obj=unconfined_u:object_r:user_tmp_t:s0 nametype=CREATE
cap_fp=0000000000000000 cap_fi=0000000000000000 cap_fe=0 cap_fver=0
type=PATH msg=audit(1521122591.614:227): item=0 name="/tmp/" inode=13513
dev=00:26 mode=041777 ouid=0 ogid=0 rdev=00:00
obj=system_u:object_r:tmp_t:s0 nametype=PARENT cap_fp=0000000000000000
cap_fi=0000000000000000 cap_fe=0 cap_fver=0 type=CWD
msg=audit(1521122591.614:227): cwd="/root"
type=SYSCALL msg=audit(1521122591.614:227): arch=c000003e syscall=257
success=yes exit=3 a0=ffffffffffffff9c a1=55db90a28900 a2=241 a3=1b6
items=2 ppid=689 pid=724 auid=0 uid=0 gid=0 euid=0 suid=0 fsuid=0 egid=0
sgid=0 fsgid=0 tty=pts0 ses=3 comm="perl" exe="/usr/bin/perl"
subj=unconfined_u:unconfined_r:unconfined_t:s0-s0:c0.c1023
key="tmpcontainerid"
https://github.com/linux-audit/audit-kernel/issues/32
https://github.com/linux-audit/audit-userspace/issues/40
https://github.com/linux-audit/audit-testsuite/issues/64
audit: add container id
audit: check children and threading before allowing containerid
audit: log container info of syscalls
audit: add containerid filtering
audit: add containerid support for ptrace and signals
audit: add support for non-syscall auxiliary records
audit: add container aux record to watch/tree/mark
audit: add containerid support for tty_audit
audit: add containerid support for config/feature/user records
audit: add containerid support for seccomp and anom_abend records
audit: add support for containerid to network namespaces
audit: NETFILTER_PKT: record each container ID associated with a netNS
debug audit: read container ID of a process
drivers/tty/tty_audit.c | 5 +-
fs/proc/base.c | 53 ++++++++++++++++
include/linux/audit.h | 43 +++++++++++++
include/linux/init_task.h | 4 +-
include/linux/sched.h | 1 +
include/net/net_namespace.h | 12 ++++
include/uapi/linux/audit.h | 8 ++-
kernel/audit.c | 75 ++++++++++++++++++++---
kernel/audit.h | 3 +
kernel/audit_fsnotify.c | 5 +-
kernel/audit_tree.c | 5 +-
kernel/audit_watch.c | 33 +++++-----
kernel/auditfilter.c | 52 +++++++++++++++-
kernel/auditsc.c | 145
++++++++++++++++++++++++++++++++++++++++++-- kernel/nsproxy.c |
6 ++
net/core/net_namespace.c | 45 ++++++++++++++
net/netfilter/xt_AUDIT.c | 15 ++++-
17 files changed, 473 insertions(+), 37 deletions(-)
- RGB

--
Richard Guy Briggs <***@redhat.com>
Sr. S/W Engineer, Kernel Security, Base Operating Systems
Remote, Ottawa, Red Hat Canada
IRC: rgb, SunRaycer
Voice: +1.647.777.2635, Internal: (81) 32635
Loading...