drm/amdkfd: Enable per process SMI event
authorPhilip Yang <Philip.Yang@amd.com>
Thu, 13 Jan 2022 23:59:02 +0000 (18:59 -0500)
committerAlex Deucher <alexander.deucher@amd.com>
Thu, 30 Jun 2022 19:30:47 +0000 (15:30 -0400)
Process receive event from same process by default. Add a flag to be
able to receive event from all processes, this requires super user
permission.

Event using pid 0 to send the event to all processes, to keep the
default behavior of existing SMI events.

Signed-off-by: Philip Yang <Philip.Yang@amd.com>
Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c

index f2e1d50..55ed026 100644 (file)
@@ -38,6 +38,8 @@ struct kfd_smi_client {
        uint64_t events;
        struct kfd_dev *dev;
        spinlock_t lock;
+       pid_t pid;
+       bool suser;
 };
 
 #define MAX_KFIFO_SIZE 1024
@@ -151,16 +153,27 @@ static int kfd_smi_ev_release(struct inode *inode, struct file *filep)
        return 0;
 }
 
-static void add_event_to_kfifo(struct kfd_dev *dev, unsigned int smi_event,
-                             char *event_msg, int len)
+static bool kfd_smi_ev_enabled(pid_t pid, struct kfd_smi_client *client,
+                              unsigned int event)
+{
+       uint64_t all = KFD_SMI_EVENT_MASK_FROM_INDEX(KFD_SMI_EVENT_ALL_PROCESS);
+       uint64_t events = READ_ONCE(client->events);
+
+       if (pid && client->pid != pid && !(client->suser && (events & all)))
+               return false;
+
+       return events & KFD_SMI_EVENT_MASK_FROM_INDEX(event);
+}
+
+static void add_event_to_kfifo(pid_t pid, struct kfd_dev *dev,
+                              unsigned int smi_event, char *event_msg, int len)
 {
        struct kfd_smi_client *client;
 
        rcu_read_lock();
 
        list_for_each_entry_rcu(client, &dev->smi_clients, list) {
-               if (!(READ_ONCE(client->events) &
-                               KFD_SMI_EVENT_MASK_FROM_INDEX(smi_event)))
+               if (!kfd_smi_ev_enabled(pid, client, smi_event))
                        continue;
                spin_lock(&client->lock);
                if (kfifo_avail(&client->fifo) >= len) {
@@ -176,9 +189,9 @@ static void add_event_to_kfifo(struct kfd_dev *dev, unsigned int smi_event,
        rcu_read_unlock();
 }
 
-__printf(3, 4)
-static void kfd_smi_event_add(struct kfd_dev *dev, unsigned int event,
-                             char *fmt, ...)
+__printf(4, 5)
+static void kfd_smi_event_add(pid_t pid, struct kfd_dev *dev,
+                             unsigned int event, char *fmt, ...)
 {
        char fifo_in[KFD_SMI_EVENT_MSG_SIZE];
        int len;
@@ -193,7 +206,7 @@ static void kfd_smi_event_add(struct kfd_dev *dev, unsigned int event,
        len += vsnprintf(fifo_in + len, sizeof(fifo_in) - len, fmt, args);
        va_end(args);
 
-       add_event_to_kfifo(dev, event, fifo_in, len);
+       add_event_to_kfifo(pid, dev, event, fifo_in, len);
 }
 
 void kfd_smi_event_update_gpu_reset(struct kfd_dev *dev, bool post_reset)
@@ -206,13 +219,13 @@ void kfd_smi_event_update_gpu_reset(struct kfd_dev *dev, bool post_reset)
                event = KFD_SMI_EVENT_GPU_PRE_RESET;
                ++(dev->reset_seq_num);
        }
-       kfd_smi_event_add(dev, event, "%x\n", dev->reset_seq_num);
+       kfd_smi_event_add(0, dev, event, "%x\n", dev->reset_seq_num);
 }
 
 void kfd_smi_event_update_thermal_throttling(struct kfd_dev *dev,
                                             uint64_t throttle_bitmask)
 {
-       kfd_smi_event_add(dev, KFD_SMI_EVENT_THERMAL_THROTTLE, "%llx:%llx\n",
+       kfd_smi_event_add(0, dev, KFD_SMI_EVENT_THERMAL_THROTTLE, "%llx:%llx\n",
                          throttle_bitmask,
                          amdgpu_dpm_get_thermal_throttling_counter(dev->adev));
 }
@@ -227,7 +240,7 @@ void kfd_smi_event_update_vmfault(struct kfd_dev *dev, uint16_t pasid)
        if (!task_info.pid)
                return;
 
-       kfd_smi_event_add(dev, KFD_SMI_EVENT_VMFAULT, "%x:%s\n",
+       kfd_smi_event_add(0, dev, KFD_SMI_EVENT_VMFAULT, "%x:%s\n",
                          task_info.pid, task_info.task_name);
 }
 
@@ -251,6 +264,8 @@ int kfd_smi_event_open(struct kfd_dev *dev, uint32_t *fd)
        spin_lock_init(&client->lock);
        client->events = 0;
        client->dev = dev;
+       client->pid = current->tgid;
+       client->suser = capable(CAP_SYS_ADMIN);
 
        spin_lock(&dev->smi_lock);
        list_add_rcu(&client->list, &dev->smi_clients);