Skip to content

Commit

Permalink
[PATCH, v9] cgroups: introduce timer slack controller
Browse files Browse the repository at this point in the history
  • Loading branch information
boype authored and AirOne70 committed Oct 2, 2017
1 parent 9bb4ac5 commit 4ea3b93
Show file tree
Hide file tree
Showing 13 changed files with 247 additions and 8 deletions.
72 changes: 72 additions & 0 deletions Documentation/cgroups/timer_slack.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
Timer Slack Controller
======================

Overview
--------

Every task_struct has timer_slack_ns value. This value uses to round up
poll() and select() timeout values. This feature can be useful in
mobile environment where combined wakeups are desired.

Originally, prctl() was the only way to change timer slack value of
a process. So you was not able change timer slack value of another
process.

cgroup subsys "timer_slack" implements timer slack controller. It
provides a way to set minimal timer slack value for a group of tasks.
If a task belongs to a cgroup with minimal timer slack value higher than
task's value, cgroup's value will be applied.

Timer slack controller allows to implement setting timer slack value of
a process based on a policy. For example, you can create foreground and
background cgroups and move tasks between them based on system state.

User interface
--------------

To get timer slack controller functionality you need to enable it in
kernel configuration:

CONFIG_CGROUP_TIMER_SLACK=y

The controller provides two files:

# mount -t cgroup -o timer_slack none /sys/fs/cgroup
# ls /sys/fs/cgroup/timer_slack.*
/sys/fs/cgroup/timer_slack.effective_slack_ns
/sys/fs/cgroup/timer_slack.min_slack_ns

By default timer_slack.min_slack_ns is 0:

# cat /sys/fs/cgroup/timer_slack.min_slack_ns
0

You can set it to some value:

# echo 50000 > /sys/fs/cgroup/timer_slack.min_slack_ns
# cat /sys/fs/cgroup/timer_slack.min_slack_ns
50000

Tasks still can set task's value below 50000 using prctl(), but in this
case cgroup's value will be applied.

Timer slack controller supports hierarchical groups.

# mkdir /sys/fs/cgroup/a
# cat /sys/fs/cgroup/a/timer_slack.min_slack_ns
50000
# echo 70000 > /sys/fs/cgroup/a/timer_slack.min_slack_ns
# cat /sys/fs/cgroup/a/timer_slack.min_slack_ns
70000

You can set any value you want, but effective value will the highest value
up by hierarchy. You can see effective timer slack value for the cgroup from
timer_slack.effective_slack_ns file:

# cat /sys/fs/cgroup/a/timer_slack.effective_slack_ns
70000
# echo 100000 > /sys/fs/cgroup/timer_slack.min_slack_ns
# cat /sys/fs/cgroup/a/timer_slack.min_slack_ns
70000
# cat /sys/fs/cgroup/a/timer_slack.effective_slack_ns
100000
1 change: 1 addition & 0 deletions arch/arm/configs/tegra3_android_defconfig
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ CONFIG_LOG_BUF_SHIFT=17
CONFIG_CGROUPS=y
CONFIG_CGROUP_DEBUG=y
CONFIG_CGROUP_FREEZER=y
CONFIG_CGROUP_TIMER_SLACK=y
# CONFIG_CGROUP_DEVICE is not set
# CONFIG_CPUSETS is not set
CONFIG_CGROUP_CPUACCT=y
Expand Down
7 changes: 2 additions & 5 deletions fs/select.c
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,6 @@ static long __estimate_accuracy(struct timespec *tv)

long select_estimate_accuracy(struct timespec *tv)
{
unsigned long ret;
struct timespec now;

/*
Expand All @@ -81,10 +80,8 @@ long select_estimate_accuracy(struct timespec *tv)

ktime_get_ts(&now);
now = timespec_sub(*tv, now);
ret = __estimate_accuracy(&now);
if (ret < current->timer_slack_ns)
return current->timer_slack_ns;
return ret;
return min_t(long, __estimate_accuracy(&now),
task_get_effective_timer_slack(current));
}


Expand Down
12 changes: 12 additions & 0 deletions include/linux/cgroup_subsys.h
Original file line number Diff line number Diff line change
Expand Up @@ -64,3 +64,15 @@ SUBSYS(perf)
#endif

/* */

#ifdef CONFIG_CGROUP_BFQIO
SUBSYS(bfqio)
#endif

/* */

#ifdef CONFIG_CGROUP_TIMER_SLACK
SUBSYS(timer_slack)
#endif

/* */
6 changes: 6 additions & 0 deletions include/linux/prctl.h
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,12 @@

#define PR_MCE_KILL_GET 34

/*
* Get effective timerslack value for the process.
* It can be higher than PR_GET_TIMERSLACK.
*/
#define PR_GET_EFFECTIVE_TIMERSLACK 35

/*
* If no_new_privs is set, then operations that grant new privileges (i.e.
* execve) will either fail or not grant them. This affects suid/sgid,
Expand Down
10 changes: 10 additions & 0 deletions include/linux/sched.h
Original file line number Diff line number Diff line change
Expand Up @@ -2682,6 +2682,16 @@ static inline unsigned long rlimit_max(unsigned int limit)
return task_rlimit_max(current, limit);
}

#ifdef CONFIG_CGROUP_TIMER_SLACK
extern unsigned long task_get_effective_timer_slack(struct task_struct *tsk);
#else
static inline unsigned long task_get_effective_timer_slack(
struct task_struct *tsk)
{
return tsk->timer_slack_ns;
}
#endif

#endif /* __KERNEL__ */

#endif
8 changes: 8 additions & 0 deletions init/Kconfig
Original file line number Diff line number Diff line change
Expand Up @@ -687,6 +687,14 @@ config CGROUP_FREEZER
Provides a way to freeze and unfreeze all tasks in a
cgroup.

config CGROUP_TIMER_SLACK
bool "Timer slack cgroup controller"
help
Provides a way to set minimal timer slack value for tasks in
a cgroup.
It's useful in mobile devices where certain background apps
are attached to a cgroup and combined wakeups are desired.

config CGROUP_DEVICE
bool "Device controller for cgroups"
help
Expand Down
1 change: 1 addition & 0 deletions kernel/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ obj-$(CONFIG_BACKTRACE_SELF_TEST) += backtracetest.o
obj-$(CONFIG_COMPAT) += compat.o
obj-$(CONFIG_CGROUPS) += cgroup.o
obj-$(CONFIG_CGROUP_FREEZER) += cgroup_freezer.o
obj-$(CONFIG_CGROUP_TIMER_SLACK) += cgroup_timer_slack.o
obj-$(CONFIG_CPUSETS) += cpuset.o
obj-$(CONFIG_UTS_NS) += utsname.o
obj-$(CONFIG_USER_NS) += user_namespace.o
Expand Down
125 changes: 125 additions & 0 deletions kernel/cgroup_timer_slack.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
/*
* cgroup_timer_slack.c - control group timer slack subsystem
*
* Copyright Nokia Corparation, 2011
* Author: Kirill A. Shutemov
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*/
#include <linux/cgroup.h>
#include <linux/slab.h>
#include <linux/err.h>

struct cgroup_subsys timer_slack_subsys;
struct tslack_cgroup {
struct cgroup_subsys_state css;
unsigned long min_slack_ns;
};

static struct tslack_cgroup *cgroup_to_tslack(struct cgroup *cgroup)
{
struct cgroup_subsys_state *css;

css = cgroup_subsys_state(cgroup, timer_slack_subsys.subsys_id);
return container_of(css, struct tslack_cgroup, css);
}

static struct cgroup_subsys_state *tslack_create(struct cgroup_subsys *subsys,
struct cgroup *cgroup)
{
struct tslack_cgroup *tslack_cgroup;

tslack_cgroup = kmalloc(sizeof(*tslack_cgroup), GFP_KERNEL);
if (!tslack_cgroup)
return ERR_PTR(-ENOMEM);

if (cgroup->parent) {
struct tslack_cgroup *parent;

parent = cgroup_to_tslack(cgroup->parent);
tslack_cgroup->min_slack_ns = parent->min_slack_ns;
} else
tslack_cgroup->min_slack_ns = 0UL;

return &tslack_cgroup->css;
}

static void tslack_destroy(struct cgroup_subsys *tslack_cgroup,
struct cgroup *cgroup)
{
kfree(cgroup_to_tslack(cgroup));
}

static u64 tslack_read_min(struct cgroup *cgroup, struct cftype *cft)
{
return cgroup_to_tslack(cgroup)->min_slack_ns;
}

static int tslack_write_min(struct cgroup *cgroup, struct cftype *cft, u64 val)
{
if (val > ULONG_MAX)
return -EINVAL;

cgroup_to_tslack(cgroup)->min_slack_ns = val;

return 0;
}

static u64 tslack_read_effective(struct cgroup *cgroup, struct cftype *cft)
{
unsigned long min;

min = cgroup_to_tslack(cgroup)->min_slack_ns;
while (cgroup->parent) {
cgroup = cgroup->parent;
min = max(cgroup_to_tslack(cgroup)->min_slack_ns, min);
}

return min;
}

static struct cftype files[] = {
{
.name = "min_slack_ns",
.read_u64 = tslack_read_min,
.write_u64 = tslack_write_min,
},
{
.name = "effective_slack_ns",
.read_u64 = tslack_read_effective,
},
};

static int tslack_populate(struct cgroup_subsys *subsys, struct cgroup *cgroup)
{
return cgroup_add_files(cgroup, subsys, files, ARRAY_SIZE(files));
}

struct cgroup_subsys timer_slack_subsys = {
.name = "timer_slack",
.subsys_id = timer_slack_subsys_id,
.create = tslack_create,
.destroy = tslack_destroy,
.populate = tslack_populate,
};

unsigned long task_get_effective_timer_slack(struct task_struct *tsk)
{
struct cgroup *cgroup;
unsigned long slack;

rcu_read_lock();
cgroup = task_cgroup(tsk, timer_slack_subsys.subsys_id);
slack = tslack_read_effective(cgroup, NULL);
rcu_read_unlock();

return max(tsk->timer_slack_ns, slack);
}
4 changes: 4 additions & 0 deletions kernel/fork.c
Original file line number Diff line number Diff line change
Expand Up @@ -1167,6 +1167,10 @@ static struct task_struct *copy_process(unsigned long clone_flags,
memset(&p->rss_stat, 0, sizeof(p->rss_stat));
#endif

/*
* Save current task's (not effective) timer slack value as default
* timer slack value for new task.
*/
p->default_timer_slack_ns = current->timer_slack_ns;

task_io_accounting_init(&p->ioac);
Expand Down
4 changes: 2 additions & 2 deletions kernel/futex.c
Original file line number Diff line number Diff line change
Expand Up @@ -1997,7 +1997,7 @@ static int futex_wait(u32 __user *uaddr, unsigned int flags, u32 val,
HRTIMER_MODE_ABS);
hrtimer_init_sleeper(to, current);
hrtimer_set_expires_range_ns(&to->timer, *abs_time,
current->timer_slack_ns);
task_get_effective_timer_slack(current));
}

retry:
Expand Down Expand Up @@ -2390,7 +2390,7 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
HRTIMER_MODE_ABS);
hrtimer_init_sleeper(to, current);
hrtimer_set_expires_range_ns(&to->timer, *abs_time,
current->timer_slack_ns);
task_get_effective_timer_slack(current));
}

/*
Expand Down
2 changes: 1 addition & 1 deletion kernel/hrtimer.c
Original file line number Diff line number Diff line change
Expand Up @@ -1574,7 +1574,7 @@ long hrtimer_nanosleep(struct timespec *rqtp, struct timespec __user *rmtp,
int ret = 0;
unsigned long slack;

slack = current->timer_slack_ns;
slack = task_get_effective_timer_slack(current);
if (rt_task(current))
slack = 0;

Expand Down
3 changes: 3 additions & 0 deletions kernel/sys.c
Original file line number Diff line number Diff line change
Expand Up @@ -1814,6 +1814,9 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
case PR_GET_TIMERSLACK:
error = current->timer_slack_ns;
break;
case PR_GET_EFFECTIVE_TIMERSLACK:
error = task_get_effective_timer_slack(current);
break;
case PR_SET_TIMERSLACK:
if (arg2 <= 0)
current->timer_slack_ns =
Expand Down

0 comments on commit 4ea3b93

Please sign in to comment.