Skip to content

Commit 2a6e7e3

Browse files
kerneltoastvbajs
authored andcommitted
msm: kgsl: Dispatch commands using a master kthread
Instead of coordinating between a worker when dispatching commands and abusing a mutex lock for synchronization, it's faster to keep a single kthread dispatching commands whenever needed. This reduces GPU processing latency. [@0ctobot: Adapted for msm-4.9, this reverts commit: 2eb74d7 ("msm: kgsl: Defer issue commands to worker thread")] Change-Id: I5d3fd2dde07ff641feaf4fdc93e5d2b423bcb23d Signed-off-by: Sultan Alsawaf <sultan@kerneltoast.com> Signed-off-by: Adam W. Willis <return.of.octobot@gmail.com> Signed-off-by: Richard Raya <rdxzv.dev@gmail.com> Signed-off-by: Yahya Wessam <yahyawessam2002@gmail.com>
1 parent 2cd1213 commit 2a6e7e3

File tree

5 files changed

+53
-70
lines changed

5 files changed

+53
-70
lines changed

drivers/gpu/msm/adreno_dispatch.c

Lines changed: 46 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,12 @@
1717
#include <linux/jiffies.h>
1818
#include <linux/err.h>
1919

20+
#include <linux/version.h>
21+
/* The sched_param struct is located elsewhere in newer kernels */
22+
#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 10, 0)
23+
#include <uapi/linux/sched/types.h>
24+
#endif
25+
2026
#include "kgsl.h"
2127
#include "kgsl_sharedmem.h"
2228
#include "adreno.h"
@@ -955,13 +961,6 @@ static void _adreno_dispatcher_issuecmds(struct adreno_device *adreno_dev)
955961
spin_unlock(&dispatcher->plist_lock);
956962
}
957963

958-
static inline void _decrement_submit_now(struct kgsl_device *device)
959-
{
960-
spin_lock(&device->submit_lock);
961-
device->submit_now--;
962-
spin_unlock(&device->submit_lock);
963-
}
964-
965964
/**
966965
* adreno_dispatcher_issuecmds() - Issue commmands from pending contexts
967966
* @adreno_dev: Pointer to the adreno device struct
@@ -970,30 +969,7 @@ static inline void _decrement_submit_now(struct kgsl_device *device)
970969
*/
971970
static void adreno_dispatcher_issuecmds(struct adreno_device *adreno_dev)
972971
{
973-
struct adreno_dispatcher *dispatcher = &adreno_dev->dispatcher;
974-
struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
975-
976-
spin_lock(&device->submit_lock);
977-
/* If state transition to SLUMBER, schedule the work for later */
978-
if (device->slumber == true) {
979-
spin_unlock(&device->submit_lock);
980-
goto done;
981-
}
982-
device->submit_now++;
983-
spin_unlock(&device->submit_lock);
984-
985-
/* If the dispatcher is busy then schedule the work for later */
986-
if (!mutex_trylock(&dispatcher->mutex)) {
987-
_decrement_submit_now(device);
988-
goto done;
989-
}
990-
991-
_adreno_dispatcher_issuecmds(adreno_dev);
992-
mutex_unlock(&dispatcher->mutex);
993-
_decrement_submit_now(device);
994-
return;
995-
done:
996-
adreno_dispatcher_schedule(device);
972+
adreno_dispatcher_schedule(KGSL_DEVICE(adreno_dev));
997973
}
998974

999975
/**
@@ -2473,12 +2449,9 @@ static void _dispatcher_power_down(struct adreno_device *adreno_dev)
24732449
mutex_unlock(&device->mutex);
24742450
}
24752451

2476-
static void adreno_dispatcher_work(struct kthread_work *work)
2452+
static void adreno_dispatcher_work(struct adreno_device *adreno_dev)
24772453
{
2478-
struct adreno_dispatcher *dispatcher =
2479-
container_of(work, struct adreno_dispatcher, work);
2480-
struct adreno_device *adreno_dev =
2481-
container_of(dispatcher, struct adreno_device, dispatcher);
2454+
struct adreno_dispatcher *dispatcher = &adreno_dev->dispatcher;
24822455
struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
24832456
struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev);
24842457
int count = 0;
@@ -2528,12 +2501,39 @@ static void adreno_dispatcher_work(struct kthread_work *work)
25282501
mutex_unlock(&dispatcher->mutex);
25292502
}
25302503

2504+
static int adreno_dispatcher_thread(void *data)
2505+
{
2506+
static const struct sched_param sched_rt_prio = {
2507+
.sched_priority = 16
2508+
};
2509+
struct adreno_device *adreno_dev = data;
2510+
struct adreno_dispatcher *dispatcher = &adreno_dev->dispatcher;
2511+
2512+
sched_setscheduler_nocheck(current, SCHED_FIFO, &sched_rt_prio);
2513+
2514+
while (1) {
2515+
bool should_stop;
2516+
2517+
wait_event(dispatcher->cmd_waitq,
2518+
(should_stop = kthread_should_stop()) ||
2519+
atomic_cmpxchg(&dispatcher->send_cmds, 1, 0));
2520+
2521+
if (should_stop)
2522+
break;
2523+
2524+
adreno_dispatcher_work(adreno_dev);
2525+
}
2526+
2527+
return 0;
2528+
}
2529+
25312530
void adreno_dispatcher_schedule(struct kgsl_device *device)
25322531
{
25332532
struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
25342533
struct adreno_dispatcher *dispatcher = &adreno_dev->dispatcher;
25352534

2536-
kthread_queue_work(&kgsl_driver.worker, &dispatcher->work);
2535+
if (!atomic_cmpxchg(&dispatcher->send_cmds, 0, 1))
2536+
wake_up(&dispatcher->cmd_waitq);
25372537
}
25382538

25392539
/**
@@ -2653,6 +2653,8 @@ void adreno_dispatcher_close(struct adreno_device *adreno_dev)
26532653
int i;
26542654
struct adreno_ringbuffer *rb;
26552655

2656+
kthread_stop(dispatcher->thread);
2657+
26562658
mutex_lock(&dispatcher->mutex);
26572659
del_timer_sync(&dispatcher->timer);
26582660
del_timer_sync(&dispatcher->fault_timer);
@@ -2820,14 +2822,19 @@ int adreno_dispatcher_init(struct adreno_device *adreno_dev)
28202822
setup_timer(&dispatcher->fault_timer, adreno_dispatcher_fault_timer,
28212823
(unsigned long) adreno_dev);
28222824

2823-
kthread_init_work(&dispatcher->work, adreno_dispatcher_work);
2824-
28252825
init_completion(&dispatcher->idle_gate);
28262826
complete_all(&dispatcher->idle_gate);
28272827

28282828
plist_head_init(&dispatcher->pending);
28292829
spin_lock_init(&dispatcher->plist_lock);
28302830

2831+
init_waitqueue_head(&dispatcher->cmd_waitq);
2832+
dispatcher->send_cmds = (atomic_t)ATOMIC_INIT(0);
2833+
dispatcher->thread = kthread_run(adreno_dispatcher_thread, adreno_dev,
2834+
"adreno_dispatch");
2835+
if (IS_ERR(dispatcher->thread))
2836+
return PTR_ERR(dispatcher->thread);
2837+
28312838
ret = kobject_init_and_add(&dispatcher->kobj, &ktype_dispatcher,
28322839
&device->dev->kobj, "dispatch");
28332840

drivers/gpu/msm/adreno_dispatch.h

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -53,9 +53,11 @@ struct adreno_dispatcher_drawqueue {
5353
* @fault: Non-zero if a fault was detected.
5454
* @pending: Priority list of contexts waiting to submit drawobjs
5555
* @plist_lock: Spin lock to protect the pending queue
56-
* @work: work_struct to put the dispatcher in a work queue
5756
* @kobj: kobject for the dispatcher directory in the device sysfs node
5857
* @idle_gate: Gate to wait on for dispatcher to idle
58+
* @thread: Kthread for the command dispatcher
59+
* @cmd_waitq: Waitqueue for the command dispatcher
60+
* @send_cmds: Atomic boolean indicating that commands should be dispatched
5961
*/
6062
struct adreno_dispatcher {
6163
struct mutex mutex;
@@ -66,9 +68,11 @@ struct adreno_dispatcher {
6668
atomic_t fault;
6769
struct plist_head pending;
6870
spinlock_t plist_lock;
69-
struct kthread_work work;
7071
struct kobject kobj;
7172
struct completion idle_gate;
73+
struct task_struct *thread;
74+
wait_queue_head_t cmd_waitq;
75+
atomic_t send_cmds;
7276
};
7377

7478
enum adreno_dispatcher_flags {

drivers/gpu/msm/kgsl.c

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5055,7 +5055,6 @@ int kgsl_device_platform_probe(struct kgsl_device *device)
50555055
device->id, device->reg_phys, device->reg_len);
50565056

50575057
rwlock_init(&device->context_lock);
5058-
spin_lock_init(&device->submit_lock);
50595058

50605059
setup_timer(&device->idle_timer, kgsl_timer, (unsigned long) device);
50615060

drivers/gpu/msm/kgsl_device.h

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -281,11 +281,6 @@ struct kgsl_device {
281281
struct kgsl_pwrctrl pwrctrl;
282282
int open_count;
283283

284-
/* For GPU inline submission */
285-
uint32_t submit_now;
286-
spinlock_t submit_lock;
287-
bool slumber;
288-
289284
struct mutex mutex;
290285
uint32_t state;
291286
uint32_t requested_state;

drivers/gpu/msm/kgsl_pwrctrl.c

Lines changed: 1 addition & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -2442,24 +2442,9 @@ void kgsl_idle_check(struct work_struct *work)
24422442
|| device->state == KGSL_STATE_NAP)) {
24432443

24442444
if (!atomic_read(&device->active_cnt)) {
2445-
spin_lock(&device->submit_lock);
2446-
if (device->submit_now) {
2447-
spin_unlock(&device->submit_lock);
2448-
goto done;
2449-
}
2450-
/* Don't allow GPU inline submission in SLUMBER */
2451-
if (requested_state == KGSL_STATE_SLUMBER)
2452-
device->slumber = true;
2453-
spin_unlock(&device->submit_lock);
2454-
24552445
ret = kgsl_pwrctrl_change_state(device,
24562446
device->requested_state);
24572447
if (ret == -EBUSY) {
2458-
if (requested_state == KGSL_STATE_SLUMBER) {
2459-
spin_lock(&device->submit_lock);
2460-
device->slumber = false;
2461-
spin_unlock(&device->submit_lock);
2462-
}
24632448
/*
24642449
* If the GPU is currently busy, restore
24652450
* the requested state and reschedule
@@ -2470,7 +2455,7 @@ void kgsl_idle_check(struct work_struct *work)
24702455
kgsl_schedule_work(&device->idle_check_ws);
24712456
}
24722457
}
2473-
done:
2458+
24742459
if (!ret)
24752460
kgsl_pwrctrl_request_state(device, KGSL_STATE_NONE);
24762461

@@ -2998,13 +2983,6 @@ static void kgsl_pwrctrl_set_state(struct kgsl_device *device,
29982983
{
29992984
device->state = state;
30002985
device->requested_state = KGSL_STATE_NONE;
3001-
3002-
spin_lock(&device->submit_lock);
3003-
if (state == KGSL_STATE_SLUMBER || state == KGSL_STATE_SUSPEND)
3004-
device->slumber = true;
3005-
else
3006-
device->slumber = false;
3007-
spin_unlock(&device->submit_lock);
30082986
}
30092987

30102988
static void kgsl_pwrctrl_request_state(struct kgsl_device *device,

0 commit comments

Comments
 (0)