* published by the Free Software Foundation.
*/
-/* #define DEBUG */
-/* #define IDEBUG */
-/* #define MDEBUG */
-/* #define RDEBUG */
-/* #define CDEBUG */
-
-/* Iface handling */
-#ifdef IDEBUG
-#define IF_DEBUG(...) pr_debug(__VA_ARGS__)
-#else
-#define IF_DEBUG(...) no_printk(__VA_ARGS__)
-#endif
-/* Iptable Matching */
-#ifdef MDEBUG
-#define MT_DEBUG(...) pr_debug(__VA_ARGS__)
-#else
-#define MT_DEBUG(...) no_printk(__VA_ARGS__)
-#endif
-/* Red-black tree handling */
-#ifdef RDEBUG
-#define RB_DEBUG(...) pr_debug(__VA_ARGS__)
-#else
-#define RB_DEBUG(...) no_printk(__VA_ARGS__)
-#endif
-/* procfs ctrl/stats handling */
-#ifdef CDEBUG
-#define CT_DEBUG(...) pr_debug(__VA_ARGS__)
-#else
-#define CT_DEBUG(...) no_printk(__VA_ARGS__)
-#endif
+/*
+ * There are run-time debug flags enabled via the debug_mask module param, or
+ * via the DEFAULT_DEBUG_MASK. See xt_qtaguid_internal.h.
+ */
+#define DEBUG
#include <linux/file.h>
#include <linux/inetdevice.h>
#include <net/udp.h>
#include <linux/netfilter/xt_socket.h>
+#include "xt_qtaguid_internal.h"
+#include "xt_qtaguid_print.h"
+
/*
* We only use the xt_socket funcs within a similar context to avoid unexpected
* return values.
S_IRUGO | S_IWUSR);
/*
+ * Limit the number of active tags (via socket tags) for a given UID.
+ * Multiple processes could share the UID.
+ */
+static int max_sock_tags = DEFAULT_MAX_SOCK_TAGS;
+module_param(max_sock_tags, int, S_IRUGO | S_IWUSR);
+
+/*
* After the kernel has initiallized this module, it is still possible
- * to make it passive:
- * - do not register it via iptables.
- * the matching code will not be invoked.
- * - set passive to 0
- * the iface stats handling will not be act on notifications.
+ * to make it passive.
+ * Setting passive to Y:
+ * - the iface stats handling will not act on notifications.
+ * - iptables matches will never match.
+ * - ctrl commands silently succeed.
+ * - stats are always empty.
* This is mostly usefull when a bug is suspected.
*/
static bool module_passive;
module_param_named(passive, module_passive, bool, S_IRUGO | S_IWUSR);
-/*---------------------------------------------------------------------------*/
/*
- * Tags:
- *
- * They represent what the data usage counters will be tracked against.
- * By default a tag is just based on the UID.
- * The UID is used as the base for policying, and can not be ignored.
- * So a tag will always at least represent a UID (uid_tag).
- *
- * A tag can be augmented with an "accounting tag" which is associated
- * with a UID.
- * User space can set the acct_tag portion of the tag which is then used
- * with sockets: all data belong to that socket will be counted against the
- * tag. The policing is then based on the tag's uid_tag portion,
- * and stats are collected for the acct_tag portion seperately.
- *
- * There could be
- * a: {acct_tag=1, uid_tag=10003}
- * b: {acct_tag=2, uid_tag=10003}
- * c: {acct_tag=3, uid_tag=10003}
- * d: {acct_tag=0, uid_tag=10003}
- * (a, b, and c represent tags associated with specific sockets.
- * d is for the totals for that uid, including all untagged traffic.
- * Typically d is used with policing/quota rules.
- *
- * We want tag_t big enough to distinguish uid_t and acct_tag.
- * It might become a struct if needed.
- * Nothing should be using it as an int.
+ * Control how qtaguid data is tracked per proc/uid.
+ * Setting tag_tracking_passive to Y:
+ * - don't create proc specific structs to track tags
+ * - don't check that active tag stats exceed some limits.
+ * - don't clean up socket tags on process exits.
+ * This is mostly usefull when a bug is suspected.
*/
-typedef uint64_t tag_t; /* Only used via accessors */
+static bool qtu_proc_handling_passive;
+module_param_named(tag_tracking_passive, qtu_proc_handling_passive, bool,
+ S_IRUGO | S_IWUSR);
+
+#define QTU_DEV_NAME "xt_qtaguid"
+
+uint debug_mask = DEFAULT_DEBUG_MASK;
+module_param(debug_mask, uint, S_IRUGO | S_IWUSR);
+
+/*---------------------------------------------------------------------------*/
static const char *iface_stat_procdirname = "iface_stat";
static struct proc_dir_entry *iface_stat_procdir;
-
/*
- * For now we only track 2 sets of counters.
- * The default set is 0.
- * Userspace can activate another set for a given uid being tracked.
+ * Ordering of locks:
+ * outer locks:
+ * iface_stat_list_lock
+ * sock_tag_list_lock
+ * inner locks:
+ * uid_tag_data_tree_lock
+ * tag_counter_set_list_lock
+ * Notice how sock_tag_list_lock is held sometimes when uid_tag_data_tree_lock
+ * is acquired.
+ *
+ * Call tree with all lock holders as of 2011-09-06:
+ *
+ * qtaguid_ctrl_parse()
+ * ctrl_cmd_delete()
+ * sock_tag_list_lock
+ * tag_counter_set_list_lock
+ * iface_stat_list_lock
+ * iface_entry->tag_stat_list_lock
+ * uid_tag_data_tree_lock
+ * ctrl_cmd_counter_set()
+ * tag_counter_set_list_lock
+ * ctrl_cmd_tag()
+ * sock_tag_list_lock
+ * get_tag_ref()
+ * uid_tag_data_tree_lock
+ * uid_tag_data_tree_lock
+ * ctrl_cmd_untag()
+ * sock_tag_list_lock
+ * uid_tag_data_tree_lock
+ *
+ * qtaguid_mt()
+ * account_for_uid()
+ * if_tag_stat_update()
+ * get_sock_stat()
+ * sock_tag_list_lock
+ * iface_entry->tag_stat_list_lock
+ * tag_stat_update()
+ * get_active_counter_set()
+ * tag_counter_set_list_lock
+ *
+ * iface_netdev_event_handler()
+ * iface_stat_create()
+ * iface_stat_list_lock
+ * iface_stat_update()
+ * iface_stat_list_lock
+ *
+ * iface_inet6addr_event_handler()
+ * iface_stat_create_ipv6()
+ * iface_stat_list_lock
+ * iface_stat_update()
+ * iface_stat_list_lock
+ *
+ * iface_inetaddr_event_handler()
+ * iface_stat_create()
+ * iface_stat_list_lock
+ * iface_stat_update()
+ * iface_stat_list_lock
+ *
+ * qtaguid_ctrl_proc_read()
+ * sock_tag_list_lock
+ * sock_tag_list_lock
+ * uid_tag_data_tree_lock
+ * iface_stat_list_lock
+ *
+ * qtaguid_stats_proc_read()
+ * iface_stat_list_lock
+ * iface_entry->tag_stat_list_lock
+ *
+ * qtudev_open()
+ * uid_tag_data_tree_lock
+ *
+ * qtud_dev_release()
+ * sock_tag_list_lock
+ * uid_tag_data_tree_lock
*/
-#define IFS_MAX_COUNTER_SETS 2
-
-enum ifs_tx_rx {
- IFS_TX,
- IFS_RX,
- IFS_MAX_DIRECTIONS
-};
-
-/* For now, TCP, UDP, the rest */
-enum ifs_proto {
- IFS_TCP,
- IFS_UDP,
- IFS_PROTO_OTHER,
- IFS_MAX_PROTOS
-};
-
-struct byte_packet_counters {
- uint64_t bytes;
- uint64_t packets;
-};
-
-struct data_counters {
- struct byte_packet_counters bpc[IFS_MAX_COUNTER_SETS][IFS_MAX_DIRECTIONS][IFS_MAX_PROTOS];
-};
-
-/* Generic tag based node used as a base for rb_tree ops. */
-struct tag_node {
- struct rb_node node;
- tag_t tag;
-};
-
-struct tag_stat {
- struct tag_node tn;
- struct data_counters counters;
- /*
- * If this tag is acct_tag based, we need to count against the
- * matching parent uid_tag.
- */
- struct data_counters *parent_counters;
-};
-
-struct iface_stat {
- struct list_head list;
- char *ifname;
- uint64_t rx_bytes;
- uint64_t rx_packets;
- uint64_t tx_bytes;
- uint64_t tx_packets;
- bool active;
- struct proc_dir_entry *proc_ptr;
-
- struct rb_root tag_stat_tree;
- spinlock_t tag_stat_list_lock;
-};
-
static LIST_HEAD(iface_stat_list);
static DEFINE_SPINLOCK(iface_stat_list_lock);
-/* This is needed to create proc_dir_entries from atomic context. */
-struct iface_stat_work {
- struct work_struct iface_work;
- struct iface_stat *iface_entry;
-};
-
-/*
- * Track tag that this socket is transferring data for, and not necessarily
- * the uid that owns the socket.
- * This is the tag against which tag_stat.counters will be billed.
- */
-struct sock_tag {
- struct rb_node sock_node;
- struct sock *sk; /* Only used as a number, never dereferenced */
- /* The socket is needed for sockfd_put() */
- struct socket *socket;
-
- tag_t tag;
-};
-
-struct qtaguid_event_counts {
- /* Various successful events */
- atomic64_t sockets_tagged;
- atomic64_t sockets_untagged;
- atomic64_t counter_set_changes;
- atomic64_t delete_cmds;
- atomic64_t iface_events; /* Number of NETDEV_* events handled */
- /*
- * match_found_sk_*: numbers related to the netfilter matching
- * function finding a sock for the sk_buff.
- */
- atomic64_t match_found_sk; /* An sk was already in the sk_buff. */
- /* The connection tracker had the sk. */
- atomic64_t match_found_sk_in_ct;
- /*
- * No sk could be found. No apparent owner. Could happen with
- * unsolicited traffic.
- */
- atomic64_t match_found_sk_none;
-};
-static struct qtaguid_event_counts qtu_events;
-
static struct rb_root sock_tag_tree = RB_ROOT;
static DEFINE_SPINLOCK(sock_tag_list_lock);
-/* Track the set active_set for the given tag. */
-struct tag_counter_set {
- struct tag_node tn;
- int active_set;
-};
-
static struct rb_root tag_counter_set_tree = RB_ROOT;
static DEFINE_SPINLOCK(tag_counter_set_list_lock);
-static bool qtaguid_mt(const struct sk_buff *skb, struct xt_action_param *par);
+static struct rb_root uid_tag_data_tree = RB_ROOT;
+static DEFINE_SPINLOCK(uid_tag_data_tree_lock);
+static struct rb_root proc_qtu_data_tree = RB_ROOT;
+/* No proc_qtu_data_tree_lock; use uid_tag_data_tree_lock */
+
+static struct qtaguid_event_counts qtu_events;
/*----------------------------------------------*/
-static inline int tag_compare(tag_t t1, tag_t t2)
+static bool can_manipulate_uids(void)
{
- return t1 < t2 ? -1 : t1 == t2 ? 0 : 1;
+ /* root pwnd */
+ return unlikely(!current_fsuid()) || unlikely(!proc_ctrl_write_gid)
+ || in_egroup_p(proc_ctrl_write_gid);
}
-static inline tag_t combine_atag_with_uid(tag_t acct_tag, uid_t uid)
-{
- return acct_tag | uid;
-}
-static inline tag_t make_tag_from_uid(uid_t uid)
-{
- return uid;
-}
-static inline uid_t get_uid_from_tag(tag_t tag)
-{
- return tag & 0xFFFFFFFFULL;
-}
-static inline tag_t get_utag_from_tag(tag_t tag)
-{
- return tag & 0xFFFFFFFFULL;
-}
-static inline tag_t get_atag_from_tag(tag_t tag)
+static bool can_impersonate_uid(uid_t uid)
{
- return tag & ~0xFFFFFFFFULL;
+ return uid == current_fsuid() || can_manipulate_uids();
}
-static inline bool valid_atag(tag_t tag)
+static bool can_read_other_uid_stats(uid_t uid)
{
- return !(tag & 0xFFFFFFFFULL);
+ /* root pwnd */
+ return unlikely(!current_fsuid()) || uid == current_fsuid()
+ || unlikely(!proc_stats_readall_gid)
+ || in_egroup_p(proc_stats_readall_gid);
}
static inline void dc_add_byte_packets(struct data_counters *counters, int set,
while (node) {
struct tag_node *data = rb_entry(node, struct tag_node, node);
- int result = tag_compare(tag, data->tag);
- RB_DEBUG("qtaguid: tag_node_tree_search(): tag=0x%llx"
- " (uid=%d)\n",
- data->tag,
- get_uid_from_tag(data->tag));
-
+ int result;
+ RB_DEBUG("qtaguid: tag_node_tree_search(0x%llx): "
+ " node=%p data=%p\n", tag, node, data);
+ result = tag_compare(tag, data->tag);
+ RB_DEBUG("qtaguid: tag_node_tree_search(0x%llx): "
+ " data.tag=0x%llx (uid=%u) res=%d\n",
+ tag, data->tag, get_uid_from_tag(data->tag), result);
if (result < 0)
node = node->rb_left;
else if (result > 0)
struct tag_node *this = rb_entry(*new, struct tag_node,
node);
int result = tag_compare(data->tag, this->tag);
- RB_DEBUG("qtaguid: tag_node_tree_insert(): tag=0x%llx"
- " (uid=%d)\n",
+ RB_DEBUG("qtaguid: %s(): tag=0x%llx"
+ " (uid=%u)\n", __func__,
this->tag,
get_uid_from_tag(this->tag));
parent = *new;
}
+static void tag_ref_tree_insert(struct tag_ref *data, struct rb_root *root)
+{
+ tag_node_tree_insert(&data->tn, root);
+}
+
+static struct tag_ref *tag_ref_tree_search(struct rb_root *root, tag_t tag)
+{
+ struct tag_node *node = tag_node_tree_search(root, tag);
+ if (!node)
+ return NULL;
+ return rb_entry(&node->node, struct tag_ref, tn.node);
+}
+
static struct sock_tag *sock_tag_tree_search(struct rb_root *root,
const struct sock *sk)
{
while (node) {
struct sock_tag *data = rb_entry(node, struct sock_tag,
sock_node);
- ptrdiff_t result = sk - data->sk;
- if (result < 0)
+ if (sk < data->sk)
node = node->rb_left;
- else if (result > 0)
+ else if (sk > data->sk)
node = node->rb_right;
else
return data;
while (*new) {
struct sock_tag *this = rb_entry(*new, struct sock_tag,
sock_node);
- ptrdiff_t result = data->sk - this->sk;
parent = *new;
- if (result < 0)
+ if (data->sk < this->sk)
new = &((*new)->rb_left);
- else if (result > 0)
+ else if (data->sk > this->sk)
new = &((*new)->rb_right);
else
BUG();
rb_insert_color(&data->sock_node, root);
}
+static void sock_tag_tree_erase(struct rb_root *st_to_free_tree)
+{
+ struct rb_node *node;
+ struct sock_tag *st_entry;
+
+ node = rb_first(st_to_free_tree);
+ while (node) {
+ st_entry = rb_entry(node, struct sock_tag, sock_node);
+ node = rb_next(node);
+ CT_DEBUG("qtaguid: %s(): "
+ "erase st: sk=%p tag=0x%llx (uid=%u)\n", __func__,
+ st_entry->sk,
+ st_entry->tag,
+ get_uid_from_tag(st_entry->tag));
+ rb_erase(&st_entry->sock_node, st_to_free_tree);
+ sockfd_put(st_entry->socket);
+ kfree(st_entry);
+ }
+}
+
+static struct proc_qtu_data *proc_qtu_data_tree_search(struct rb_root *root,
+ const pid_t pid)
+{
+ struct rb_node *node = root->rb_node;
+
+ while (node) {
+ struct proc_qtu_data *data = rb_entry(node,
+ struct proc_qtu_data,
+ node);
+ if (pid < data->pid)
+ node = node->rb_left;
+ else if (pid > data->pid)
+ node = node->rb_right;
+ else
+ return data;
+ }
+ return NULL;
+}
+
+static void proc_qtu_data_tree_insert(struct proc_qtu_data *data,
+ struct rb_root *root)
+{
+ struct rb_node **new = &(root->rb_node), *parent = NULL;
+
+ /* Figure out where to put new node */
+ while (*new) {
+ struct proc_qtu_data *this = rb_entry(*new,
+ struct proc_qtu_data,
+ node);
+ parent = *new;
+ if (data->pid < this->pid)
+ new = &((*new)->rb_left);
+ else if (data->pid > this->pid)
+ new = &((*new)->rb_right);
+ else
+ BUG();
+ }
+
+ /* Add new node and rebalance tree. */
+ rb_link_node(&data->node, parent, new);
+ rb_insert_color(&data->node, root);
+}
+
+static void uid_tag_data_tree_insert(struct uid_tag_data *data,
+ struct rb_root *root)
+{
+ struct rb_node **new = &(root->rb_node), *parent = NULL;
+
+ /* Figure out where to put new node */
+ while (*new) {
+ struct uid_tag_data *this = rb_entry(*new,
+ struct uid_tag_data,
+ node);
+ parent = *new;
+ if (data->uid < this->uid)
+ new = &((*new)->rb_left);
+ else if (data->uid > this->uid)
+ new = &((*new)->rb_right);
+ else
+ BUG();
+ }
+
+ /* Add new node and rebalance tree. */
+ rb_link_node(&data->node, parent, new);
+ rb_insert_color(&data->node, root);
+}
+
+static struct uid_tag_data *uid_tag_data_tree_search(struct rb_root *root,
+ uid_t uid)
+{
+ struct rb_node *node = root->rb_node;
+
+ while (node) {
+ struct uid_tag_data *data = rb_entry(node,
+ struct uid_tag_data,
+ node);
+ if (uid < data->uid)
+ node = node->rb_left;
+ else if (uid > data->uid)
+ node = node->rb_right;
+ else
+ return data;
+ }
+ return NULL;
+}
+
+/*
+ * Allocates a new uid_tag_data struct if needed.
+ * Returns a pointer to the found or allocated uid_tag_data.
+ * Returns a PTR_ERR on failures, and lock is not held.
+ * If found is not NULL:
+ * sets *found to true if not allocated.
+ * sets *found to false if allocated.
+ */
+struct uid_tag_data *get_uid_data(uid_t uid, bool *found_res)
+{
+ struct uid_tag_data *utd_entry;
+
+ /* Look for top level uid_tag_data for the UID */
+ utd_entry = uid_tag_data_tree_search(&uid_tag_data_tree, uid);
+ DR_DEBUG("qtaguid: get_uid_data(%u) utd=%p\n", uid, utd_entry);
+
+ if (found_res)
+ *found_res = utd_entry;
+ if (utd_entry)
+ return utd_entry;
+
+ utd_entry = kzalloc(sizeof(*utd_entry), GFP_ATOMIC);
+ if (!utd_entry) {
+ pr_err("qtaguid: get_uid_data(%u): "
+ "tag data alloc failed\n", uid);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ utd_entry->uid = uid;
+ utd_entry->tag_ref_tree = RB_ROOT;
+ uid_tag_data_tree_insert(utd_entry, &uid_tag_data_tree);
+ DR_DEBUG("qtaguid: get_uid_data(%u) new utd=%p\n", uid, utd_entry);
+ return utd_entry;
+}
+
+/* Never returns NULL. Either PTR_ERR or a valid ptr. */
+static struct tag_ref *new_tag_ref(tag_t new_tag,
+ struct uid_tag_data *utd_entry)
+{
+ struct tag_ref *tr_entry;
+ int res;
+
+ if (utd_entry->num_active_tags + 1 > max_sock_tags) {
+ pr_info("qtaguid: new_tag_ref(0x%llx): "
+ "tag ref alloc quota exceeded. max=%d\n",
+ new_tag, max_sock_tags);
+ res = -EMFILE;
+ goto err_res;
+
+ }
+
+ tr_entry = kzalloc(sizeof(*tr_entry), GFP_ATOMIC);
+ if (!tr_entry) {
+ pr_err("qtaguid: new_tag_ref(0x%llx): "
+ "tag ref alloc failed\n",
+ new_tag);
+ res = -ENOMEM;
+ goto err_res;
+ }
+ tr_entry->tn.tag = new_tag;
+ /* tr_entry->num_sock_tags handled by caller */
+ utd_entry->num_active_tags++;
+ tag_ref_tree_insert(tr_entry, &utd_entry->tag_ref_tree);
+ DR_DEBUG("qtaguid: new_tag_ref(0x%llx): "
+ " inserted new tag ref\n",
+ new_tag);
+ return tr_entry;
+
+err_res:
+ return ERR_PTR(res);
+}
+
+static struct tag_ref *lookup_tag_ref(tag_t full_tag,
+ struct uid_tag_data **utd_res)
+{
+ struct uid_tag_data *utd_entry;
+ struct tag_ref *tr_entry;
+ bool found_utd;
+ uid_t uid = get_uid_from_tag(full_tag);
+
+ DR_DEBUG("qtaguid: lookup_tag_ref(tag=0x%llx (uid=%u))\n",
+ full_tag, uid);
+
+ utd_entry = get_uid_data(uid, &found_utd);
+ if (IS_ERR_OR_NULL(utd_entry)) {
+ if (utd_res)
+ *utd_res = utd_entry;
+ return NULL;
+ }
+
+ tr_entry = tag_ref_tree_search(&utd_entry->tag_ref_tree, full_tag);
+ if (utd_res)
+ *utd_res = utd_entry;
+ DR_DEBUG("qtaguid: lookup_tag_ref(0x%llx) utd_entry=%p tr_entry=%p\n",
+ full_tag, utd_entry, tr_entry);
+ return tr_entry;
+}
+
+/* Never returns NULL. Either PTR_ERR or a valid ptr. */
+static struct tag_ref *get_tag_ref(tag_t full_tag,
+ struct uid_tag_data **utd_res)
+{
+ struct uid_tag_data *utd_entry;
+ struct tag_ref *tr_entry;
+
+ DR_DEBUG("qtaguid: get_tag_ref(0x%llx)\n",
+ full_tag);
+ spin_lock_bh(&uid_tag_data_tree_lock);
+ tr_entry = lookup_tag_ref(full_tag, &utd_entry);
+ BUG_ON(IS_ERR_OR_NULL(utd_entry));
+ if (!tr_entry)
+ tr_entry = new_tag_ref(full_tag, utd_entry);
+
+ spin_unlock_bh(&uid_tag_data_tree_lock);
+ if (utd_res)
+ *utd_res = utd_entry;
+ DR_DEBUG("qtaguid: get_tag_ref(0x%llx) utd=%p tr=%p\n",
+ full_tag, utd_entry, tr_entry);
+ return tr_entry;
+}
+
+/* Checks and maybe frees the UID Tag Data entry */
+static void put_utd_entry(struct uid_tag_data *utd_entry)
+{
+ /* Are we done with the UID tag data entry? */
+ if (RB_EMPTY_ROOT(&utd_entry->tag_ref_tree)) {
+ DR_DEBUG("qtaguid: %s(): "
+ "erase utd_entry=%p uid=%u "
+ "by pid=%u tgid=%u uid=%u\n", __func__,
+ utd_entry, utd_entry->uid,
+ current->pid, current->tgid, current_fsuid());
+ BUG_ON(utd_entry->num_active_tags);
+ rb_erase(&utd_entry->node, &uid_tag_data_tree);
+ kfree(utd_entry);
+ } else {
+ DR_DEBUG("qtaguid: %s(): "
+ "utd_entry=%p still has %d tags\n", __func__,
+ utd_entry, utd_entry->num_active_tags);
+ BUG_ON(!utd_entry->num_active_tags);
+ }
+}
+
+/*
+ * If no sock_tags are using this tag_ref,
+ * decrements refcount of utd_entry, removes tr_entry
+ * from utd_entry->tag_ref_tree and frees.
+ */
+static void free_tag_ref_from_utd_entry(struct tag_ref *tr_entry,
+ struct uid_tag_data *utd_entry)
+{
+ DR_DEBUG("qtaguid: %s(): %p tag=0x%llx (uid=%u)\n", __func__,
+ tr_entry, tr_entry->tn.tag,
+ get_uid_from_tag(tr_entry->tn.tag));
+ if (!tr_entry->num_sock_tags) {
+ BUG_ON(!utd_entry->num_active_tags);
+ utd_entry->num_active_tags--;
+ rb_erase(&tr_entry->tn.node, &utd_entry->tag_ref_tree);
+ DR_DEBUG("qtaguid: %s(): erased %p\n", __func__, tr_entry);
+ kfree(tr_entry);
+ }
+}
+
+static void put_tag_ref_tree(tag_t full_tag, struct uid_tag_data *utd_entry)
+{
+ struct rb_node *node;
+ struct tag_ref *tr_entry;
+ tag_t acct_tag;
+
+ DR_DEBUG("qtaguid: %s(tag=0x%llx (uid=%u))\n", __func__,
+ full_tag, get_uid_from_tag(full_tag));
+ acct_tag = get_atag_from_tag(full_tag);
+ node = rb_first(&utd_entry->tag_ref_tree);
+ while (node) {
+ tr_entry = rb_entry(node, struct tag_ref, tn.node);
+ node = rb_next(node);
+ if (!acct_tag || tr_entry->tn.tag == full_tag)
+ free_tag_ref_from_utd_entry(tr_entry, utd_entry);
+ }
+}
+
static int read_proc_u64(char *page, char **start, off_t off,
int count, int *eof, void *data)
{
tag_t tag)
{
struct tag_stat *new_tag_stat_entry = NULL;
- IF_DEBUG("qtaguid: iface_stat: create_if_tag_stat(): ife=%p tag=0x%llx"
- " (uid=%u)\n",
+ IF_DEBUG("qtaguid: iface_stat: %s(): ife=%p tag=0x%llx"
+ " (uid=%u)\n", __func__,
iface_entry, tag, get_uid_from_tag(tag));
new_tag_stat_entry = kzalloc(sizeof(*new_tag_stat_entry), GFP_ATOMIC);
if (!new_tag_stat_entry) {
acct_tag = get_atag_from_tag(tag);
uid_tag = get_utag_from_tag(tag);
} else {
- uid_tag = make_tag_from_uid(uid);
- acct_tag = 0;
+ acct_tag = make_atag_from_value(0);
tag = combine_atag_with_uid(acct_tag, uid);
+ uid_tag = make_tag_from_uid(uid);
}
MT_DEBUG("qtaguid: iface_stat: stat_update(): "
" looking for tag=0x%llx (uid=%u) in ife=%p\n",
char *outp = page;
int len;
uid_t uid;
- struct sock_tag *sock_tag_entry;
struct rb_node *node;
+ struct sock_tag *sock_tag_entry;
int item_index = 0;
+ int indent_level = 0;
+ long f_count;
if (unlikely(module_passive)) {
*eof = 1;
return 0;
}
- /* TODO: support skipping num_items_returned on entry. */
- CT_DEBUG("qtaguid: proc ctrl page=%p off=%ld char_count=%d *eof=%d\n",
- page, items_to_skip, char_count, *eof);
-
if (*eof)
return 0;
+ CT_DEBUG("qtaguid: proc ctrl page=%p off=%ld char_count=%d *eof=%d\n",
+ page, items_to_skip, char_count, *eof);
+
spin_lock_bh(&sock_tag_list_lock);
for (node = rb_first(&sock_tag_tree);
node;
continue;
sock_tag_entry = rb_entry(node, struct sock_tag, sock_node);
uid = get_uid_from_tag(sock_tag_entry->tag);
- CT_DEBUG("qtaguid: proc_read(): sk=%p tag=0x%llx (uid=%u)\n",
+ CT_DEBUG("qtaguid: proc_read(): sk=%p tag=0x%llx (uid=%u) "
+ "pid=%u\n",
sock_tag_entry->sk,
sock_tag_entry->tag,
- uid
+ uid,
+ sock_tag_entry->pid
);
+ f_count = atomic_long_read(
+ &sock_tag_entry->socket->file->f_count);
len = snprintf(outp, char_count,
- "sock=%p tag=0x%llx (uid=%u)\n",
- sock_tag_entry->sk, sock_tag_entry->tag, uid);
+ "sock=%p tag=0x%llx (uid=%u) pid=%u "
+ "f_count=%lu\n",
+ sock_tag_entry->sk,
+ sock_tag_entry->tag, uid,
+ sock_tag_entry->pid, f_count);
if (len >= char_count) {
spin_unlock_bh(&sock_tag_list_lock);
*outp = '\0';
(*num_items_returned)++;
}
- *eof = 1;
- return outp - page;
-}
+#ifdef CDEBUG
+ /* Count the following as part of the last item_index */
+ if (item_index > items_to_skip) {
+ CT_DEBUG("qtaguid: proc ctrl state debug {\n");
+ spin_lock_bh(&sock_tag_list_lock);
+ prdebug_sock_tag_tree(indent_level, &sock_tag_tree);
+ spin_unlock_bh(&sock_tag_list_lock);
-static bool can_manipulate_uids(void)
-{
- /* root pwnd */
- return unlikely(!current_fsuid()) || unlikely(!proc_ctrl_write_gid)
- || in_egroup_p(proc_ctrl_write_gid);
-}
+ spin_lock_bh(&uid_tag_data_tree_lock);
+ prdebug_uid_tag_data_tree(indent_level, &uid_tag_data_tree);
+ prdebug_proc_qtu_data_tree(indent_level, &proc_qtu_data_tree);
+ spin_unlock_bh(&uid_tag_data_tree_lock);
-static bool can_impersonate_uid(uid_t uid)
-{
- return uid == current_fsuid() || can_manipulate_uids();
-}
+ spin_lock_bh(&iface_stat_list_lock);
+ prdebug_iface_stat_list(indent_level, &iface_stat_list);
+ spin_unlock_bh(&iface_stat_list_lock);
-static bool can_read_other_uid_stats(uid_t uid)
-{
- /* root pwnd */
- return unlikely(!current_fsuid()) || uid == current_fsuid()
- || unlikely(!proc_stats_readall_gid)
- || in_egroup_p(proc_stats_readall_gid);
+ CT_DEBUG("qtaguid: proc ctrl state debug }\n");
+
+
+ }
+#endif
+
+ *eof = 1;
+ return outp - page;
}
/*
struct rb_root st_to_free_tree = RB_ROOT;
struct tag_stat *ts_entry;
struct tag_counter_set *tcs_entry;
+ struct tag_ref *tr_entry;
+ struct uid_tag_data *utd_entry;
argc = sscanf(input, "%c %llu %u", &cmd, &acct_tag, &uid);
CT_DEBUG("qtaguid: ctrl_delete(%s): argc=%d cmd=%c "
uid = current_fsuid();
} else if (!can_impersonate_uid(uid)) {
pr_info("qtaguid: ctrl_delete(%s): "
- "insufficient priv from pid=%u uid=%u\n",
- input, current->pid, current_fsuid());
+ "insufficient priv from pid=%u tgid=%u uid=%u\n",
+ input, current->pid, current->tgid, current_fsuid());
res = -EPERM;
goto err;
}
+ tag = combine_atag_with_uid(acct_tag, uid);
+ CT_DEBUG("qtaguid: ctrl_delete(): "
+ "looking for tag=0x%llx (uid=%u)\n",
+ tag, uid);
+
/* Delete socket tags */
spin_lock_bh(&sock_tag_list_lock);
node = rb_first(&sock_tag_tree);
if (entry_uid != uid)
continue;
+ CT_DEBUG("qtaguid: ctrl_delete(): st tag=0x%llx (uid=%u)\n",
+ st_entry->tag, entry_uid);
+
if (!acct_tag || st_entry->tag == tag) {
rb_erase(&st_entry->sock_node, &sock_tag_tree);
/* Can't sockfd_put() within spinlock, do it later. */
sock_tag_tree_insert(st_entry, &st_to_free_tree);
+ tr_entry = lookup_tag_ref(st_entry->tag, NULL);
+ BUG_ON(tr_entry->num_sock_tags <= 0);
+ tr_entry->num_sock_tags--;
}
}
spin_unlock_bh(&sock_tag_list_lock);
- node = rb_first(&st_to_free_tree);
- while (node) {
- st_entry = rb_entry(node, struct sock_tag, sock_node);
- node = rb_next(node);
- CT_DEBUG("qtaguid: ctrl_delete(): "
- "erase st: sk=%p tag=0x%llx (uid=%u)\n",
- st_entry->sk,
- st_entry->tag,
- entry_uid);
- rb_erase(&st_entry->sock_node, &st_to_free_tree);
- sockfd_put(st_entry->socket);
- kfree(st_entry);
- }
-
- tag = combine_atag_with_uid(acct_tag, uid);
+ sock_tag_tree_erase(&st_to_free_tree);
/* Delete tag counter-sets */
spin_lock_bh(&tag_counter_set_list_lock);
+ /* Counter sets are only on the uid tag, not full tag */
tcs_entry = tag_counter_set_tree_search(&tag_counter_set_tree, tag);
if (tcs_entry) {
CT_DEBUG("qtaguid: ctrl_delete(): "
ts_entry = rb_entry(node, struct tag_stat, tn.node);
entry_uid = get_uid_from_tag(ts_entry->tn.tag);
node = rb_next(node);
+
+ CT_DEBUG("qtaguid: ctrl_delete(): "
+ "ts tag=0x%llx (uid=%u)\n",
+ ts_entry->tn.tag, entry_uid);
+
if (entry_uid != uid)
continue;
if (!acct_tag || ts_entry->tn.tag == tag) {
spin_unlock_bh(&iface_entry->tag_stat_list_lock);
}
spin_unlock_bh(&iface_stat_list_lock);
+
+ /* Cleanup the uid_tag_data */
+ spin_lock_bh(&uid_tag_data_tree_lock);
+ node = rb_first(&uid_tag_data_tree);
+ while (node) {
+ utd_entry = rb_entry(node, struct uid_tag_data, node);
+ entry_uid = utd_entry->uid;
+ node = rb_next(node);
+
+ CT_DEBUG("qtaguid: ctrl_delete(): "
+ "utd uid=%u\n",
+ entry_uid);
+
+ if (entry_uid != uid)
+ continue;
+ /*
+ * Go over the tag_refs, and those that don't have
+ * sock_tags using them are freed.
+ */
+ put_tag_ref_tree(tag, utd_entry);
+ put_utd_entry(utd_entry);
+ }
+ spin_unlock_bh(&uid_tag_data_tree_lock);
+
atomic64_inc(&qtu_events.delete_cmds);
res = 0;
}
if (!can_manipulate_uids()) {
pr_info("qtaguid: ctrl_counterset(%s): "
- "insufficient priv from pid=%u uid=%u\n",
- input, current->pid, current_fsuid());
+ "insufficient priv from pid=%u tgid=%u uid=%u\n",
+ input, current->pid, current->tgid, current_fsuid());
res = -EPERM;
goto err;
}
char cmd;
int sock_fd = 0;
uid_t uid = 0;
- tag_t acct_tag = 0;
+ tag_t acct_tag = make_atag_from_value(0);
+ tag_t full_tag;
struct socket *el_socket;
- int refcnt = -1;
int res, argc;
struct sock_tag *sock_tag_entry;
+ struct tag_ref *tag_ref_entry;
+ struct uid_tag_data *uid_tag_data_entry;
+ struct proc_qtu_data *pqd_entry;
/* Unassigned args will get defaulted later. */
argc = sscanf(input, "%c %d %llu %u", &cmd, &sock_fd, &acct_tag, &uid);
" sock_fd=%d err=%d\n", input, sock_fd, res);
goto err;
}
- refcnt = atomic_read(&el_socket->file->f_count);
- CT_DEBUG("qtaguid: ctrl_tag(%s): socket->...->f_count=%d\n",
- input, refcnt);
+ CT_DEBUG("qtaguid: ctrl_tag(%s): socket->...->f_count=%ld ->sk=%p\n",
+ input, atomic_long_read(&el_socket->file->f_count),
+ el_socket->sk);
if (argc < 3) {
- acct_tag = 0;
+ acct_tag = make_atag_from_value(0);
} else if (!valid_atag(acct_tag)) {
pr_info("qtaguid: ctrl_tag(%s): invalid tag\n", input);
res = -EINVAL;
goto err_put;
}
CT_DEBUG("qtaguid: ctrl_tag(%s): "
- "uid=%u euid=%u fsuid=%u "
+ "pid=%u tgid=%u uid=%u euid=%u fsuid=%u "
"in_group=%d in_egroup=%d\n",
- input, current_uid(), current_euid(), current_fsuid(),
- in_group_p(proc_stats_readall_gid),
- in_egroup_p(proc_stats_readall_gid));
+ input, current->pid, current->tgid, current_uid(),
+ current_euid(), current_fsuid(),
+ in_group_p(proc_ctrl_write_gid),
+ in_egroup_p(proc_ctrl_write_gid));
if (argc < 4) {
uid = current_fsuid();
} else if (!can_impersonate_uid(uid)) {
pr_info("qtaguid: ctrl_tag(%s): "
- "insufficient priv from pid=%u uid=%u\n",
- input, current->pid, current_fsuid());
+ "insufficient priv from pid=%u tgid=%u uid=%u\n",
+ input, current->pid, current->tgid, current_fsuid());
res = -EPERM;
goto err_put;
}
+ full_tag = combine_atag_with_uid(acct_tag, uid);
spin_lock_bh(&sock_tag_list_lock);
sock_tag_entry = get_sock_stat_nl(el_socket->sk);
+ tag_ref_entry = get_tag_ref(full_tag, &uid_tag_data_entry);
+ if (IS_ERR(tag_ref_entry)) {
+ res = PTR_ERR(tag_ref_entry);
+ spin_unlock_bh(&sock_tag_list_lock);
+ goto err_put;
+ }
+ tag_ref_entry->num_sock_tags++;
if (sock_tag_entry) {
+ struct tag_ref *prev_tag_ref_entry;
+
+ CT_DEBUG("qtaguid: ctrl_tag(%s): retag for sk=%p "
+ "st@%p ...->f_count=%ld\n",
+ input, el_socket->sk, sock_tag_entry,
+ atomic_long_read(&el_socket->file->f_count));
/*
* This is a re-tagging, so release the sock_fd that was
* locked at the time of the 1st tagging.
+ * There is still the ref from this call's sockfd_lookup() so
+ * it can be done within the spinlock.
*/
sockfd_put(sock_tag_entry->socket);
- refcnt--;
- sock_tag_entry->tag = combine_atag_with_uid(acct_tag,
- uid);
+ prev_tag_ref_entry = lookup_tag_ref(sock_tag_entry->tag,
+ &uid_tag_data_entry);
+ BUG_ON(IS_ERR_OR_NULL(prev_tag_ref_entry));
+ BUG_ON(prev_tag_ref_entry->num_sock_tags <= 0);
+ prev_tag_ref_entry->num_sock_tags--;
+ sock_tag_entry->tag = full_tag;
} else {
+ CT_DEBUG("qtaguid: ctrl_tag(%s): newtag for sk=%p\n",
+ input, el_socket->sk);
sock_tag_entry = kzalloc(sizeof(*sock_tag_entry),
GFP_ATOMIC);
if (!sock_tag_entry) {
input);
spin_unlock_bh(&sock_tag_list_lock);
res = -ENOMEM;
- goto err_put;
+ goto err_tag_unref_put;
}
sock_tag_entry->sk = el_socket->sk;
sock_tag_entry->socket = el_socket;
+ sock_tag_entry->pid = current->tgid;
sock_tag_entry->tag = combine_atag_with_uid(acct_tag,
uid);
+ spin_lock_bh(&uid_tag_data_tree_lock);
+ pqd_entry = proc_qtu_data_tree_search(
+ &proc_qtu_data_tree, current->tgid);
+ /* TODO: remove if() test, do BUG_ON() */
+ WARN_ON(IS_ERR_OR_NULL(pqd_entry));
+ if (!IS_ERR_OR_NULL(pqd_entry)) {
+ list_add(&sock_tag_entry->list,
+ &pqd_entry->sock_tag_list);
+ }
+ spin_unlock_bh(&uid_tag_data_tree_lock);
+
sock_tag_tree_insert(sock_tag_entry, &sock_tag_tree);
atomic64_inc(&qtu_events.sockets_tagged);
}
spin_unlock_bh(&sock_tag_list_lock);
/* We keep the ref to the socket (file) until it is untagged */
- CT_DEBUG("qtaguid: ctrl_tag(%s): done. socket->...->f_count=%d\n",
- input,
- el_socket ? atomic_read(&el_socket->file->f_count) : -1);
+ CT_DEBUG("qtaguid: ctrl_tag(%s): done st@%p ...->f_count=%ld\n",
+ input, sock_tag_entry,
+ atomic_long_read(&el_socket->file->f_count));
return 0;
+err_tag_unref_put:
+ BUG_ON(tag_ref_entry->num_sock_tags <= 0);
+ tag_ref_entry->num_sock_tags--;
+ free_tag_ref_from_utd_entry(tag_ref_entry, uid_tag_data_entry);
err_put:
+ CT_DEBUG("qtaguid: ctrl_tag(%s): done. ...->f_count=%ld\n",
+ input, atomic_long_read(&el_socket->file->f_count) - 1);
/* Release the sock_fd that was grabbed by sockfd_lookup(). */
sockfd_put(el_socket);
- refcnt--;
+ return res;
+
err:
- CT_DEBUG("qtaguid: ctrl_tag(%s): done. socket->...->f_count=%d\n",
- input, refcnt);
+ CT_DEBUG("qtaguid: ctrl_tag(%s): done.\n", input);
return res;
}
char cmd;
int sock_fd = 0;
struct socket *el_socket;
- int refcnt = -1;
int res, argc;
struct sock_tag *sock_tag_entry;
+ struct tag_ref *tag_ref_entry;
+ struct uid_tag_data *utd_entry;
+ struct proc_qtu_data *pqd_entry;
argc = sscanf(input, "%c %d", &cmd, &sock_fd);
CT_DEBUG("qtaguid: ctrl_untag(%s): argc=%d cmd=%c sock_fd=%d\n",
" sock_fd=%d err=%d\n", input, sock_fd, res);
goto err;
}
- refcnt = atomic_read(&el_socket->file->f_count);
- CT_DEBUG("qtaguid: ctrl_untag(%s): socket->...->f_count=%d\n",
- input, refcnt);
+ CT_DEBUG("qtaguid: ctrl_untag(%s): socket->...->f_count=%ld ->sk=%p\n",
+ input, atomic_long_read(&el_socket->file->f_count),
+ el_socket->sk);
spin_lock_bh(&sock_tag_list_lock);
sock_tag_entry = get_sock_stat_nl(el_socket->sk);
if (!sock_tag_entry) {
*/
rb_erase(&sock_tag_entry->sock_node, &sock_tag_tree);
+ tag_ref_entry = lookup_tag_ref(sock_tag_entry->tag, &utd_entry);
+ BUG_ON(!tag_ref_entry);
+ BUG_ON(tag_ref_entry->num_sock_tags <= 0);
+ spin_lock_bh(&uid_tag_data_tree_lock);
+ pqd_entry = proc_qtu_data_tree_search(
+ &proc_qtu_data_tree, current->tgid);
+ /* TODO: remove if() test, do BUG_ON() */
+ WARN_ON(IS_ERR_OR_NULL(pqd_entry));
+ if (!IS_ERR_OR_NULL(pqd_entry))
+ list_del(&sock_tag_entry->list);
+ spin_unlock_bh(&uid_tag_data_tree_lock);
+ /*
+ * We don't free tag_ref from the utd_entry here,
+ * only during a cmd_delete().
+ */
+ tag_ref_entry->num_sock_tags--;
+ spin_unlock_bh(&sock_tag_list_lock);
/*
* Release the sock_fd that was grabbed at tag time,
* and once more for the sockfd_lookup() here.
*/
sockfd_put(sock_tag_entry->socket);
- spin_unlock_bh(&sock_tag_list_lock);
+ CT_DEBUG("qtaguid: ctrl_untag(%s): done. st@%p ...->f_count=%ld\n",
+ input, sock_tag_entry,
+ atomic_long_read(&el_socket->file->f_count) - 1);
sockfd_put(el_socket);
- refcnt -= 2;
+
kfree(sock_tag_entry);
atomic64_inc(&qtu_events.sockets_untagged);
- CT_DEBUG("qtaguid: ctrl_untag(%s): done. socket->...->f_count=%d\n",
- input, refcnt);
return 0;
err_put:
+ CT_DEBUG("qtaguid: ctrl_untag(%s): done. socket->...->f_count=%ld\n",
+ input, atomic_long_read(&el_socket->file->f_count) - 1);
/* Release the sock_fd that was grabbed by sockfd_lookup(). */
sockfd_put(el_socket);
- refcnt--;
+ return res;
+
err:
- CT_DEBUG("qtaguid: ctrl_untag(%s): done. socket->...->f_count=%d\n",
- input, refcnt);
+ CT_DEBUG("qtaguid: ctrl_untag(%s): done.\n", input);
return res;
}
if (!can_read_other_uid_stats(stat_uid)) {
CT_DEBUG("qtaguid: stats line: "
- "%s 0x%llx %u: "
- "insufficient priv from pid=%u uid=%u\n",
+ "%s 0x%llx %u: insufficient priv "
+ "from pid=%u tgid=%u uid=%u\n",
ppi->iface_entry->ifname,
get_atag_from_tag(tag), stat_uid,
- current->pid, current_fsuid());
+ current->pid, current->tgid, current_fsuid());
return 0;
}
if (ppi->item_index++ < ppi->items_to_skip)
if (unlikely(module_passive)) {
len = pp_stats_line(&ppi, 0);
/* The header should always be shorter than the buffer. */
- WARN_ON(len >= ppi.char_count);
+ BUG_ON(len >= ppi.char_count);
(*num_items_returned)++;
*eof = 1;
return len;
}
/*------------------------------------------*/
+static int qtudev_open(struct inode *inode, struct file *file)
+{
+ struct uid_tag_data *utd_entry;
+ struct proc_qtu_data *pqd_entry;
+ struct proc_qtu_data *new_pqd_entry = 0;
+ int res;
+ bool utd_entry_found;
+
+ if (unlikely(qtu_proc_handling_passive))
+ return 0;
+
+ DR_DEBUG("qtaguid: qtudev_open(): pid=%u tgid=%u uid=%u\n",
+ current->pid, current->tgid, current_fsuid());
+
+ spin_lock_bh(&uid_tag_data_tree_lock);
+
+ /* Look for existing uid data, or alloc one. */
+ utd_entry = get_uid_data(current_fsuid(), &utd_entry_found);
+ if (IS_ERR_OR_NULL(utd_entry)) {
+ res = PTR_ERR(utd_entry);
+ goto err;
+ }
+
+ /* Look for existing PID based proc_data */
+ pqd_entry = proc_qtu_data_tree_search(&proc_qtu_data_tree,
+ current->tgid);
+ if (pqd_entry) {
+ pr_err("qtaguid: qtudev_open(): %u/%u %u "
+ "%s already opened\n",
+ current->pid, current->tgid, current_fsuid(),
+ QTU_DEV_NAME);
+ res = -EBUSY;
+ goto err_unlock_free_utd;
+ }
+
+ new_pqd_entry = kzalloc(sizeof(*new_pqd_entry), GFP_ATOMIC);
+ if (!new_pqd_entry) {
+ pr_err("qtaguid: qtudev_open(): %u/%u %u: "
+ "proc data alloc failed\n",
+ current->pid, current->tgid, current_fsuid());
+ res = -ENOMEM;
+ goto err_unlock_free_utd;
+ }
+ new_pqd_entry->pid = current->tgid;
+ INIT_LIST_HEAD(&new_pqd_entry->sock_tag_list);
+ new_pqd_entry->parent_tag_data = utd_entry;
+
+ proc_qtu_data_tree_insert(new_pqd_entry,
+ &proc_qtu_data_tree);
+
+ spin_unlock_bh(&uid_tag_data_tree_lock);
+ DR_DEBUG("qtaguid: tracking data for uid=%u\n", current_fsuid());
+ file->private_data = new_pqd_entry;
+ return 0;
+
+err_unlock_free_utd:
+ if (!utd_entry_found) {
+ rb_erase(&utd_entry->node, &uid_tag_data_tree);
+ kfree(utd_entry);
+ }
+ spin_unlock_bh(&uid_tag_data_tree_lock);
+err:
+ return res;
+}
+
+static int qtudev_release(struct inode *inode, struct file *file)
+{
+ struct proc_qtu_data *pqd_entry = file->private_data;
+ struct uid_tag_data *utd_entry = pqd_entry->parent_tag_data;
+ struct sock_tag *st_entry;
+ struct rb_root st_to_free_tree = RB_ROOT;
+ struct list_head *entry, *next;
+ struct tag_ref *tr;
+
+ if (unlikely(qtu_proc_handling_passive))
+ return 0;
+
+ /*
+ * Do not trust the current->pid, it might just be a kworker cleaning
+ * up after a dead proc.
+ */
+ DR_DEBUG("qtaguid: qtudev_release(): "
+ "pid=%u tgid=%u uid=%u "
+ "pqd_entry=%p->pid=%u utd_entry=%p->active_tags=%d\n",
+ current->pid, current->tgid, pqd_entry->parent_tag_data->uid,
+ pqd_entry, pqd_entry->pid, utd_entry,
+ utd_entry->num_active_tags);
+
+ spin_lock_bh(&sock_tag_list_lock);
+ spin_lock_bh(&uid_tag_data_tree_lock);
+
+ /*
+ * If this proc didn't actually tag anything for itself, or has already
+ * willingly cleaned up itself ...
+ */
+ put_utd_entry(utd_entry);
+
+ list_for_each_safe(entry, next, &pqd_entry->sock_tag_list) {
+ st_entry = list_entry(entry, struct sock_tag, list);
+ DR_DEBUG("qtaguid: %s(): "
+ "erase sock_tag=%p->sk=%p pid=%u tgid=%u uid=%u\n",
+ __func__,
+ st_entry, st_entry->sk,
+ current->pid, current->tgid,
+ pqd_entry->parent_tag_data->uid);
+
+ utd_entry = uid_tag_data_tree_search(
+ &uid_tag_data_tree,
+ get_uid_from_tag(st_entry->tag));
+ BUG_ON(IS_ERR_OR_NULL(utd_entry));
+ DR_DEBUG("qtaguid: %s(): "
+ "looking for tag=0x%llx in utd_entry=%p\n", __func__,
+ st_entry->tag, utd_entry);
+ tr = tag_ref_tree_search(&utd_entry->tag_ref_tree,
+ st_entry->tag);
+ BUG_ON(!tr);
+ BUG_ON(tr->num_sock_tags <= 0);
+ tr->num_sock_tags--;
+ free_tag_ref_from_utd_entry(tr, utd_entry);
+
+ rb_erase(&st_entry->sock_node, &sock_tag_tree);
+ list_del(&st_entry->list);
+ /* Can't sockfd_put() within spinlock, do it later. */
+ sock_tag_tree_insert(st_entry, &st_to_free_tree);
+
+ /* Do not put_utd_entry(utd_entry) someone elses utd_entry */
+ }
+
+ rb_erase(&pqd_entry->node, &proc_qtu_data_tree);
+ kfree(pqd_entry);
+ file->private_data = NULL;
+
+ spin_unlock_bh(&uid_tag_data_tree_lock);
+ spin_unlock_bh(&sock_tag_list_lock);
+
+
+ sock_tag_tree_erase(&st_to_free_tree);
+
+
+ return 0;
+}
+
+/*------------------------------------------*/
+static const struct file_operations qtudev_fops = {
+ .owner = THIS_MODULE,
+ .open = qtudev_open,
+ .release = qtudev_release,
+};
+
+static struct miscdevice qtu_device = {
+ .minor = MISC_DYNAMIC_MINOR,
+ .name = QTU_DEV_NAME,
+ .fops = &qtudev_fops,
+ /* How sad it doesn't allow for defaults: .mode = S_IRUGO | S_IWUSR */
+};
+
+/*------------------------------------------*/
static int __init qtaguid_proc_register(struct proc_dir_entry **res_procdir)
{
int ret;
{
if (qtaguid_proc_register(&xt_qtaguid_procdir)
|| iface_stat_init(xt_qtaguid_procdir)
- || xt_register_match(&qtaguid_mt_reg))
+ || xt_register_match(&qtaguid_mt_reg)
+ || misc_register(&qtu_device))
return -1;
return 0;
}
--- /dev/null
+/*
+ * Pretty printing Support for iptables xt_qtaguid module.
+ *
+ * (C) 2011 Google, Inc
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+/*
+ * There are run-time debug flags enabled via the debug_mask module param, or
+ * via the DEFAULT_DEBUG_MASK. See xt_qtaguid_internal.h.
+ */
+#define DEBUG
+
+#include <linux/fs.h>
+#include <linux/gfp.h>
+#include <linux/net.h>
+#include <linux/rbtree.h>
+#include <linux/slab.h>
+#include <linux/spinlock_types.h>
+
+
+#include "xt_qtaguid_internal.h"
+#include "xt_qtaguid_print.h"
+
+char *pp_tag_t(tag_t *tag)
+{
+ if (!tag)
+ return kasprintf(GFP_ATOMIC, "tag_t@null{}");
+ return kasprintf(GFP_ATOMIC,
+ "tag_t@%p{tag=0x%llx, uid=%u}",
+ tag, *tag, get_uid_from_tag(*tag));
+}
+
+char *pp_data_counters(struct data_counters *dc, bool showValues)
+{
+ if (!dc)
+ return kasprintf(GFP_ATOMIC, "data_counters@null{}");
+ if (showValues)
+ return kasprintf(
+ GFP_ATOMIC, "data_counters@%p{"
+ "set0{"
+ "rx{"
+ "tcp{b=%llu, p=%llu}, "
+ "udp{b=%llu, p=%llu},"
+ "other{b=%llu, p=%llu}}, "
+ "tx{"
+ "tcp{b=%llu, p=%llu}, "
+ "udp{b=%llu, p=%llu},"
+ "other{b=%llu, p=%llu}}}, "
+ "set1{"
+ "rx{"
+ "tcp{b=%llu, p=%llu}, "
+ "udp{b=%llu, p=%llu},"
+ "other{b=%llu, p=%llu}}, "
+ "tx{"
+ "tcp{b=%llu, p=%llu}, "
+ "udp{b=%llu, p=%llu},"
+ "other{b=%llu, p=%llu}}}}",
+ dc,
+ dc->bpc[0][IFS_RX][IFS_TCP].bytes,
+ dc->bpc[0][IFS_RX][IFS_TCP].packets,
+ dc->bpc[0][IFS_RX][IFS_UDP].bytes,
+ dc->bpc[0][IFS_RX][IFS_UDP].packets,
+ dc->bpc[0][IFS_RX][IFS_PROTO_OTHER].bytes,
+ dc->bpc[0][IFS_RX][IFS_PROTO_OTHER].packets,
+ dc->bpc[0][IFS_TX][IFS_TCP].bytes,
+ dc->bpc[0][IFS_TX][IFS_TCP].packets,
+ dc->bpc[0][IFS_TX][IFS_UDP].bytes,
+ dc->bpc[0][IFS_TX][IFS_UDP].packets,
+ dc->bpc[0][IFS_TX][IFS_PROTO_OTHER].bytes,
+ dc->bpc[0][IFS_TX][IFS_PROTO_OTHER].packets,
+ dc->bpc[1][IFS_RX][IFS_TCP].bytes,
+ dc->bpc[1][IFS_RX][IFS_TCP].packets,
+ dc->bpc[1][IFS_RX][IFS_UDP].bytes,
+ dc->bpc[1][IFS_RX][IFS_UDP].packets,
+ dc->bpc[1][IFS_RX][IFS_PROTO_OTHER].bytes,
+ dc->bpc[1][IFS_RX][IFS_PROTO_OTHER].packets,
+ dc->bpc[1][IFS_TX][IFS_TCP].bytes,
+ dc->bpc[1][IFS_TX][IFS_TCP].packets,
+ dc->bpc[1][IFS_TX][IFS_UDP].bytes,
+ dc->bpc[1][IFS_TX][IFS_UDP].packets,
+ dc->bpc[1][IFS_TX][IFS_PROTO_OTHER].bytes,
+ dc->bpc[1][IFS_TX][IFS_PROTO_OTHER].packets);
+ else
+ return kasprintf(GFP_ATOMIC, "data_counters@%p{...}", dc);
+}
+
+char *pp_tag_node(struct tag_node *tn)
+{
+ char *tag_str;
+ char *res;
+
+ if (!tn)
+ return kasprintf(GFP_ATOMIC, "tag_node@null{}");
+ tag_str = pp_tag_t(&tn->tag);
+ res = kasprintf(GFP_ATOMIC,
+ "tag_node@%p{tag=%s}",
+ tn, tag_str);
+ kfree(tag_str);
+ return res;
+}
+
+char *pp_tag_ref(struct tag_ref *tr)
+{
+ char *tn_str;
+ char *res;
+
+ if (!tr)
+ return kasprintf(GFP_ATOMIC, "tag_ref@null{}");
+ tn_str = pp_tag_node(&tr->tn);
+ res = kasprintf(GFP_ATOMIC,
+ "tag_ref@%p{%s, num_sock_tags=%d}",
+ tr, tn_str, tr->num_sock_tags);
+ kfree(tn_str);
+ return res;
+}
+
+char *pp_tag_stat(struct tag_stat *ts)
+{
+ char *tn_str;
+ char *counters_str;
+ char *parent_counters_str;
+ char *res;
+
+ if (!ts)
+ return kasprintf(GFP_ATOMIC, "tag_stat@null{}");
+ tn_str = pp_tag_node(&ts->tn);
+ counters_str = pp_data_counters(&ts->counters, true);
+ parent_counters_str = pp_data_counters(ts->parent_counters, false);
+ res = kasprintf(GFP_ATOMIC,
+ "tag_stat@%p{%s, counters=%s, parent_counters=%s}",
+ ts, tn_str, counters_str, parent_counters_str);
+ kfree(tn_str);
+ kfree(counters_str);
+ kfree(parent_counters_str);
+ return res;
+}
+
+char *pp_iface_stat(struct iface_stat *is)
+{
+ if (!is)
+ return kasprintf(GFP_ATOMIC, "iface_stat@null{}");
+ return kasprintf(GFP_ATOMIC, "iface_stat@%p{"
+ "list=list_head{...}, "
+ "ifname=%s, "
+ "rx_bytes=%llu, "
+ "rx_packets=%llu, "
+ "tx_bytes=%llu, "
+ "tx_packets=%llu, "
+ "active=%d, "
+ "proc_ptr=%p, "
+ "tag_stat_tree=rb_root{...}}",
+ is,
+ is->ifname,
+ is->rx_bytes,
+ is->rx_packets,
+ is->tx_bytes,
+ is->tx_packets,
+ is->active,
+ is->proc_ptr);
+}
+
+char *pp_sock_tag(struct sock_tag *st)
+{
+ char *tag_str;
+ char *res;
+
+ if (!st)
+ return kasprintf(GFP_ATOMIC, "sock_tag@null{}");
+ tag_str = pp_tag_t(&st->tag);
+ res = kasprintf(GFP_ATOMIC, "sock_tag@%p{"
+ "sock_node=rb_node{...}, "
+ "sk=%p socket=%p (f_count=%lu), list=list_head{...}, "
+ "pid=%u, tag=%s}",
+ st, st->sk, st->socket, atomic_long_read(
+ &st->socket->file->f_count),
+ st->pid, tag_str);
+ kfree(tag_str);
+ return res;
+}
+
+char *pp_uid_tag_data(struct uid_tag_data *utd)
+{
+ char *res;
+
+ if (!utd)
+ return kasprintf(GFP_ATOMIC, "uid_tag_data@null{}");
+ res = kasprintf(GFP_ATOMIC, "uid_tag_data@%p{"
+ "uid=%u, num_active_acct_tags=%d, "
+ "tag_node_tree=rb_root{...}, "
+ "proc_qtu_data_tree=rb_root{...}}",
+ utd, utd->uid,
+ utd->num_active_tags);
+ return res;
+}
+
+char *pp_proc_qtu_data(struct proc_qtu_data *pqd)
+{
+ char *parent_tag_data_str;
+ char *res;
+
+ if (!pqd)
+ return kasprintf(GFP_ATOMIC, "proc_qtu_data@null{}");
+ parent_tag_data_str = pp_uid_tag_data(pqd->parent_tag_data);
+ res = kasprintf(GFP_ATOMIC, "proc_qtu_data@%p{"
+ "node=rb_node{...}, pid=%u, "
+ "parent_tag_data=%s, "
+ "sock_tag_list=list_head{...}}",
+ pqd, pqd->pid, parent_tag_data_str
+ );
+ kfree(parent_tag_data_str);
+ return res;
+}
+
+/*------------------------------------------*/
+void prdebug_sock_tag_tree(int indent_level,
+ struct rb_root *sock_tag_tree)
+{
+ struct rb_node *node;
+ struct sock_tag *sock_tag_entry;
+ char *str;
+
+ str = "sock_tag_tree=rb_root{";
+ CT_DEBUG("%*d: %s\n", indent_level*2, indent_level, str);
+ indent_level++;
+ for (node = rb_first(sock_tag_tree);
+ node;
+ node = rb_next(node)) {
+ sock_tag_entry = rb_entry(node, struct sock_tag, sock_node);
+ str = pp_sock_tag(sock_tag_entry);
+ CT_DEBUG("%*d: %s,\n", indent_level*2, indent_level, str);
+ kfree(str);
+ }
+ indent_level--;
+ str = "}";
+ CT_DEBUG("%*d: %s\n", indent_level*2, indent_level, str);
+}
+
+void prdebug_sock_tag_list(int indent_level,
+ struct list_head *sock_tag_list)
+{
+ struct sock_tag *sock_tag_entry;
+ char *str;
+
+ str = "sock_tag_list=list_head{";
+ CT_DEBUG("%*d: %s\n", indent_level*2, indent_level, str);
+ indent_level++;
+ list_for_each_entry(sock_tag_entry, sock_tag_list, list) {
+ str = pp_sock_tag(sock_tag_entry);
+ CT_DEBUG("%*d: %s,\n", indent_level*2, indent_level, str);
+ kfree(str);
+ }
+ indent_level--;
+ str = "}";
+ CT_DEBUG("%*d: %s\n", indent_level*2, indent_level, str);
+}
+
+void prdebug_proc_qtu_data_tree(int indent_level,
+ struct rb_root *proc_qtu_data_tree)
+{
+ char *str;
+ struct rb_node *node;
+ struct proc_qtu_data *proc_qtu_data_entry;
+
+ str = "proc_qtu_data_tree=rb_root{";
+ CT_DEBUG("%*d: %s\n", indent_level*2, indent_level, str);
+ indent_level++;
+ for (node = rb_first(proc_qtu_data_tree);
+ node;
+ node = rb_next(node)) {
+ proc_qtu_data_entry = rb_entry(node,
+ struct proc_qtu_data,
+ node);
+ str = pp_proc_qtu_data(proc_qtu_data_entry);
+ CT_DEBUG("%*d: %s,\n", indent_level*2, indent_level,
+ str);
+ kfree(str);
+ indent_level++;
+ prdebug_sock_tag_list(indent_level,
+ &proc_qtu_data_entry->sock_tag_list);
+ indent_level--;
+
+ }
+ indent_level--;
+ str = "}";
+ CT_DEBUG("%*d: %s\n", indent_level*2, indent_level, str);
+}
+
+void prdebug_tag_ref_tree(int indent_level, struct rb_root *tag_ref_tree)
+{
+ char *str;
+ struct rb_node *node;
+ struct tag_ref *tag_ref_entry;
+
+ str = "tag_ref_tree{";
+ CT_DEBUG("%*d: %s\n", indent_level*2, indent_level, str);
+ indent_level++;
+ for (node = rb_first(tag_ref_tree);
+ node;
+ node = rb_next(node)) {
+ tag_ref_entry = rb_entry(node,
+ struct tag_ref,
+ tn.node);
+ str = pp_tag_ref(tag_ref_entry);
+ CT_DEBUG("%*d: %s,\n", indent_level*2, indent_level,
+ str);
+ kfree(str);
+ }
+ indent_level--;
+ str = "}";
+ CT_DEBUG("%*d: %s\n", indent_level*2, indent_level, str);
+}
+
+void prdebug_uid_tag_data_tree(int indent_level,
+ struct rb_root *uid_tag_data_tree)
+{
+ char *str;
+ struct rb_node *node;
+ struct uid_tag_data *uid_tag_data_entry;
+
+ str = "uid_tag_data_tree=rb_root{";
+ CT_DEBUG("%*d: %s\n", indent_level*2, indent_level, str);
+ indent_level++;
+ for (node = rb_first(uid_tag_data_tree);
+ node;
+ node = rb_next(node)) {
+ uid_tag_data_entry = rb_entry(node, struct uid_tag_data,
+ node);
+ str = pp_uid_tag_data(uid_tag_data_entry);
+ CT_DEBUG("%*d: %s,\n", indent_level*2, indent_level, str);
+ kfree(str);
+ if (!RB_EMPTY_ROOT(&uid_tag_data_entry->tag_ref_tree)) {
+ indent_level++;
+ prdebug_tag_ref_tree(indent_level,
+ &uid_tag_data_entry->tag_ref_tree);
+ indent_level--;
+ }
+ }
+ indent_level--;
+ str = "}";
+ CT_DEBUG("%*d: %s\n", indent_level*2, indent_level, str);
+}
+
+void prdebug_tag_stat_tree(int indent_level,
+ struct rb_root *tag_stat_tree)
+{
+ char *str;
+ struct rb_node *node;
+ struct tag_stat *ts_entry;
+
+ str = "tag_stat_tree{";
+ CT_DEBUG("%*d: %s\n", indent_level*2, indent_level, str);
+ indent_level++;
+ for (node = rb_first(tag_stat_tree);
+ node;
+ node = rb_next(node)) {
+ ts_entry = rb_entry(node, struct tag_stat, tn.node);
+ str = pp_tag_stat(ts_entry);
+ CT_DEBUG("%*d: %s\n", indent_level*2, indent_level,
+ str);
+ kfree(str);
+ }
+ indent_level--;
+ str = "}";
+ CT_DEBUG("%*d: %s\n", indent_level*2, indent_level, str);
+}
+
+void prdebug_iface_stat_list(int indent_level,
+ struct list_head *iface_stat_list)
+{
+ char *str;
+ struct iface_stat *iface_entry;
+
+ str = "iface_stat_list=list_head{";
+ CT_DEBUG("%*d: %s\n", indent_level*2, indent_level, str);
+ indent_level++;
+ list_for_each_entry(iface_entry, iface_stat_list, list) {
+ str = pp_iface_stat(iface_entry);
+ CT_DEBUG("%*d: %s\n", indent_level*2, indent_level, str);
+ kfree(str);
+
+ spin_lock_bh(&iface_entry->tag_stat_list_lock);
+ if (!RB_EMPTY_ROOT(&iface_entry->tag_stat_tree)) {
+ indent_level++;
+ prdebug_tag_stat_tree(indent_level,
+ &iface_entry->tag_stat_tree);
+ indent_level--;
+ }
+ spin_unlock_bh(&iface_entry->tag_stat_list_lock);
+ }
+ indent_level--;
+ str = "}";
+ CT_DEBUG("%*d: %s\n", indent_level*2, indent_level, str);
+}