diff options
Diffstat (limited to 'drivers/gpu/drm')
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 2 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 12 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 4 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_device.c | 4 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 2 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_process.c | 15 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c | 35 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h | 4 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 6 |
9 files changed, 69 insertions, 15 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index b25b41f50213..73bf8b5f2aa9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -336,7 +336,7 @@ void amdgpu_amdkfd_release_notify(struct amdgpu_bo *bo) } #endif /* KGD2KFD callbacks */ -int kgd2kfd_quiesce_mm(struct mm_struct *mm); +int kgd2kfd_quiesce_mm(struct mm_struct *mm, uint32_t trigger); int kgd2kfd_resume_mm(struct mm_struct *mm); int kgd2kfd_schedule_evict_and_restore_process(struct mm_struct *mm, struct dma_fence *fence); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 0036c9e405af..2fcc6e079769 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -32,6 +32,7 @@ #include "amdgpu_dma_buf.h" #include <uapi/linux/kfd_ioctl.h> #include "amdgpu_xgmi.h" +#include "kfd_smi_events.h" /* Userptr restore delay, just long enough to allow consecutive VM * changes to accumulate @@ -2346,7 +2347,7 @@ int amdgpu_amdkfd_evict_userptr(struct kgd_mem *mem, evicted_bos = atomic_inc_return(&process_info->evicted_bos); if (evicted_bos == 1) { /* First eviction, stop the queues */ - r = kgd2kfd_quiesce_mm(mm); + r = kgd2kfd_quiesce_mm(mm, KFD_QUEUE_EVICTION_TRIGGER_USERPTR); if (r) pr_err("Failed to quiesce KFD\n"); schedule_delayed_work(&process_info->restore_userptr_work, @@ -2620,13 +2621,16 @@ static void amdgpu_amdkfd_restore_userptr_worker(struct work_struct *work) unlock_out: mutex_unlock(&process_info->lock); - mmput(mm); - put_task_struct(usertask); /* If validation failed, reschedule another attempt */ - if (evicted_bos) + if (evicted_bos) { schedule_delayed_work(&process_info->restore_userptr_work, msecs_to_jiffies(AMDGPU_USERPTR_RESTORE_DELAY_MS)); + + kfd_smi_event_queue_restore_rescheduled(mm); + } + mmput(mm); + put_task_struct(usertask); } /** amdgpu_amdkfd_gpuvm_restore_process_bos - Restore all BOs for the given diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index d07588230ed6..2b3d8bc8f0aa 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -2434,7 +2434,7 @@ static int criu_restore(struct file *filep, * Set the process to evicted state to avoid running any new queues before all the memory * mappings are ready. */ - ret = kfd_process_evict_queues(p); + ret = kfd_process_evict_queues(p, KFD_QUEUE_EVICTION_CRIU_RESTORE); if (ret) goto exit_unlock; @@ -2553,7 +2553,7 @@ static int criu_process_info(struct file *filep, goto err_unlock; } - ret = kfd_process_evict_queues(p); + ret = kfd_process_evict_queues(p, KFD_QUEUE_EVICTION_CRIU_CHECKPOINT); if (ret) goto err_unlock; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index c8fee0dbfdcb..6ec0e9f0927d 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -837,7 +837,7 @@ void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry) spin_unlock_irqrestore(&kfd->interrupt_lock, flags); } -int kgd2kfd_quiesce_mm(struct mm_struct *mm) +int kgd2kfd_quiesce_mm(struct mm_struct *mm, uint32_t trigger) { struct kfd_process *p; int r; @@ -851,7 +851,7 @@ int kgd2kfd_quiesce_mm(struct mm_struct *mm) return -ESRCH; WARN(debug_evictions, "Evicting pid %d", p->lead_thread->pid); - r = kfd_process_evict_queues(p); + r = kfd_process_evict_queues(p, trigger); kfd_unref_process(p); return r; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 4c4bbd493caa..d03a3b9c9c5d 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -947,7 +947,7 @@ static inline struct kfd_process_device *kfd_process_device_from_gpuidx( } void kfd_unref_process(struct kfd_process *p); -int kfd_process_evict_queues(struct kfd_process *p); +int kfd_process_evict_queues(struct kfd_process *p, uint32_t trigger); int kfd_process_restore_queues(struct kfd_process *p); void kfd_suspend_all_processes(void); int kfd_resume_all_processes(void); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index a13e60d48b73..fc38a4d81420 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -43,6 +43,7 @@ struct mm_struct; #include "kfd_device_queue_manager.h" #include "kfd_iommu.h" #include "kfd_svm.h" +#include "kfd_smi_events.h" /* * List of struct kfd_process (field kfd_process). @@ -1736,7 +1737,7 @@ struct kfd_process *kfd_lookup_process_by_mm(const struct mm_struct *mm) * Eviction is reference-counted per process-device. This means multiple * evictions from different sources can be nested safely. */ -int kfd_process_evict_queues(struct kfd_process *p) +int kfd_process_evict_queues(struct kfd_process *p, uint32_t trigger) { int r = 0; int i; @@ -1745,6 +1746,9 @@ int kfd_process_evict_queues(struct kfd_process *p) for (i = 0; i < p->n_pdds; i++) { struct kfd_process_device *pdd = p->pdds[i]; + kfd_smi_event_queue_eviction(pdd->dev, p->lead_thread->pid, + trigger); + r = pdd->dev->dqm->ops.evict_process_queues(pdd->dev->dqm, &pdd->qpd); /* evict return -EIO if HWS is hang or asic is resetting, in this case @@ -1769,6 +1773,9 @@ fail: if (n_evicted == 0) break; + + kfd_smi_event_queue_restore(pdd->dev, p->lead_thread->pid); + if (pdd->dev->dqm->ops.restore_process_queues(pdd->dev->dqm, &pdd->qpd)) pr_err("Failed to restore queues\n"); @@ -1788,6 +1795,8 @@ int kfd_process_restore_queues(struct kfd_process *p) for (i = 0; i < p->n_pdds; i++) { struct kfd_process_device *pdd = p->pdds[i]; + kfd_smi_event_queue_restore(pdd->dev, p->lead_thread->pid); + r = pdd->dev->dqm->ops.restore_process_queues(pdd->dev->dqm, &pdd->qpd); if (r) { @@ -1849,7 +1858,7 @@ static void evict_process_worker(struct work_struct *work) flush_delayed_work(&p->restore_work); pr_debug("Started evicting pasid 0x%x\n", p->pasid); - ret = kfd_process_evict_queues(p); + ret = kfd_process_evict_queues(p, KFD_QUEUE_EVICTION_TRIGGER_TTM); if (!ret) { dma_fence_signal(p->ef); dma_fence_put(p->ef); @@ -1916,7 +1925,7 @@ void kfd_suspend_all_processes(void) cancel_delayed_work_sync(&p->eviction_work); cancel_delayed_work_sync(&p->restore_work); - if (kfd_process_evict_queues(p)) + if (kfd_process_evict_queues(p, KFD_QUEUE_EVICTION_TRIGGER_SUSPEND)) pr_err("Failed to suspend process 0x%x\n", p->pasid); dma_fence_signal(p->ef); dma_fence_put(p->ef); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c index ec4d278c2a47..3917c38204d0 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c @@ -283,6 +283,41 @@ void kfd_smi_event_migration_end(struct kfd_dev *dev, pid_t pid, from, to, trigger); } +void kfd_smi_event_queue_eviction(struct kfd_dev *dev, pid_t pid, + uint32_t trigger) +{ + kfd_smi_event_add(pid, dev, KFD_SMI_EVENT_QUEUE_EVICTION, + "%lld -%d %x %d\n", ktime_get_boottime_ns(), pid, + dev->id, trigger); +} + +void kfd_smi_event_queue_restore(struct kfd_dev *dev, pid_t pid) +{ + kfd_smi_event_add(pid, dev, KFD_SMI_EVENT_QUEUE_RESTORE, + "%lld -%d %x\n", ktime_get_boottime_ns(), pid, + dev->id); +} + +void kfd_smi_event_queue_restore_rescheduled(struct mm_struct *mm) +{ + struct kfd_process *p; + int i; + + p = kfd_lookup_process_by_mm(mm); + if (!p) + return; + + for (i = 0; i < p->n_pdds; i++) { + struct kfd_process_device *pdd = p->pdds[i]; + + kfd_smi_event_add(p->lead_thread->pid, pdd->dev, + KFD_SMI_EVENT_QUEUE_RESTORE, + "%lld -%d %x %c\n", ktime_get_boottime_ns(), + p->lead_thread->pid, pdd->dev->id, 'R'); + } + kfd_unref_process(p); +} + int kfd_smi_event_open(struct kfd_dev *dev, uint32_t *fd) { struct kfd_smi_client *client; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h index ec5d74a2fef4..b23292637239 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.h @@ -42,4 +42,8 @@ void kfd_smi_event_migration_start(struct kfd_dev *dev, pid_t pid, void kfd_smi_event_migration_end(struct kfd_dev *dev, pid_t pid, unsigned long start, unsigned long end, uint32_t from, uint32_t to, uint32_t trigger); +void kfd_smi_event_queue_eviction(struct kfd_dev *dev, pid_t pid, + uint32_t trigger); +void kfd_smi_event_queue_restore(struct kfd_dev *dev, pid_t pid); +void kfd_smi_event_queue_restore_rescheduled(struct mm_struct *mm); #endif diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index e8ded7a02bcb..8bfb7b99e45d 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -1730,14 +1730,16 @@ out_reschedule: mutex_unlock(&svms->lock); mmap_write_unlock(mm); mutex_unlock(&process_info->lock); - mmput(mm); /* If validation failed, reschedule another attempt */ if (evicted_ranges) { pr_debug("reschedule to restore svm range\n"); schedule_delayed_work(&svms->restore_work, msecs_to_jiffies(AMDGPU_SVM_RANGE_RESTORE_DELAY_MS)); + + kfd_smi_event_queue_restore_rescheduled(mm); } + mmput(mm); } /** @@ -1793,7 +1795,7 @@ svm_range_evict(struct svm_range *prange, struct mm_struct *mm, prange->svms, prange->start, prange->last); /* First eviction, stop the queues */ - r = kgd2kfd_quiesce_mm(mm); + r = kgd2kfd_quiesce_mm(mm, KFD_QUEUE_EVICTION_TRIGGER_SVM); if (r) pr_debug("failed to quiesce KFD\n"); |