Linux内核之waitqueue机制
概要
当内核程序需要wait或sleep来等待某个event事件到来再被wakeup的时候,waitqueue是一种处理方法。
watiqueue的大体作用:
当某个进程wait for an event(比如等待数据),它会进入sleep。睡眠会让该进程suspend execution(挂起),让出CPU给其他任务。然后等待一段时间后,该等待的进程会在其等待的事件arrived后被wakeup唤醒,然后继续执行。不过使用waitqueue的时候需要注意避免race condition。
初始化waitqueue
代码头文件
#include <linux/wait.h>
两种方法初始化一个waitqueue:
static
DECLARE_WAIT_QUEUE_HEAD(wq_name);
其中,wa_name就是waitqueue的取名;
dynamic
wait_queue_head_t wq_name;
init_waitqueue_head (&wq_name);
Queuing
定义和初始化好了waitqueue之后,内核提供了一系列宏定义,用于将process加入waitqueue。下面的宏定义都可以将process加入已定义好的waitqueue,然后process进入sleep去等待某事件的发生。
进程(任务)进入waitqueue的宏 | 进程(任务)在waitqueue中时的状态 |
---|---|
wait_event | TASK_UNINTERRPTIBLE |
wait_event_timeout | TASK_UNINTERRPTIBLE |
wait_event_cmd | TASK_UNINTERRPTIBLE |
wait_event_interruptible | TASK_INTERRPTIBLE |
wait_event_killable | TASK_KILLABLE |
___wait_event
上述几个marco宏最终都是调用的**___wait_event()宏,区别就在于传参不同**。
/*
* The below macro ___wait_event() has an explicit shadow of the __ret
* variable when used from the wait_event_*() macros.
*
* This is so that both can use the ___wait_cond_timeout() construct
* to wrap the condition.
*
* The type inconsistency of the wait_event_*() __ret variable is also
* on purpose; we use long where we can return timeout values and int
* otherwise.
*/
#define ___wait_event(wq_head, condition, state, exclusive, ret, cmd) \
({ \
__label__ __out; \
struct wait_queue_entry __wq_entry; \
long __ret = ret; /* explicit shadow */ \
\
init_wait_entry(&__wq_entry, exclusive ? WQ_FLAG_EXCLUSIVE : 0); \
for (;;) { \
long __int = prepare_to_wait_event(&wq_head, &__wq_entry, state);\
\
if (condition) \
break; \
\
if (___wait_is_interruptible(state) && __int) { \
__ret = __int; \
goto __out; \
} \
\
cmd; \
} \
finish_wait(&wq_head, &__wq_entry); \
__out: __ret; \
})
wait_event
#define __wait_event(wq_head, condition) \
(void)___wait_event(wq_head, condition, TASK_UNINTERRUPTIBLE, 0, 0, \
schedule())
/**
* wait_event - sleep until a condition gets true
* @wq_head: the waitqueue to wait on
* @condition: a C expression for the event to wait for
*
* The process is put to sleep (TASK_UNINTERRUPTIBLE) until the
* @condition evaluates to true. The @condition is checked each time
* the waitqueue @wq_head is woken up.
*
* wake_up() has to be called after changing any variable that could
* change the result of the wait condition.
*/
#define wait_event(wq_head, condition) \
do { \
might_sleep(); \
if (condition) \
break; \
__wait_event(wq_head, condition); \
} while (0)
wait_event_timeout
增加了超时时间,jiffies为单位(Linux默认1000hz的话那就是1s为单位)。
#define __wait_event_timeout(wq_head, condition, timeout) \
___wait_event(wq_head, ___wait_cond_timeout(condition), \
TASK_UNINTERRUPTIBLE, 0, timeout, \
__ret = schedule_timeout(__ret))
/**
* wait_event_timeout - sleep until a condition gets true or a timeout elapses
* @wq_head: the waitqueue to wait on
* @condition: a C expression for the event to wait for
* @timeout: timeout, in jiffies
*
* The process is put to sleep (TASK_UNINTERRUPTIBLE) until the
* @condition evaluates to true. The @condition is checked each time
* the waitqueue @wq_head is woken up.
*
* wake_up() has to be called after changing any variable that could
* change the result of the wait condition.
*
* Returns:
* 0 if the @condition evaluated to %false after the @timeout elapsed,
* 1 if the @condition evaluated to %true after the @timeout elapsed,
* or the remaining jiffies (at least 1) if the @condition evaluated
* to %true before the @timeout elapsed.
*/
#define wait_event_timeout(wq_head, condition, timeout) \
({ \
long __ret = timeout; \
might_sleep(); \
if (!___wait_cond_timeout(condition)) \
__ret = __wait_event_timeout(wq_head, condition, timeout); \
__ret; \
})
wait_event_cmd
#define __wait_event_cmd(wq_head, condition, cmd1, cmd2) \
(void)___wait_event(wq_head, condition, TASK_UNINTERRUPTIBLE, 0, 0, \
cmd1; schedule(); cmd2)
/**
* wait_event_cmd - sleep until a condition gets true
* @wq_head: the waitqueue to wait on
* @condition: a C expression for the event to wait for
* @cmd1: the command will be executed before sleep
* @cmd2: the command will be executed after sleep
*
* The process is put to sleep (TASK_UNINTERRUPTIBLE) until the
* @condition evaluates to true. The @condition is checked each time
* the waitqueue @wq_head is woken up.
*
* wake_up() has to be called after changing any variable that could
* change the result of the wait condition.
*/
#define wait_event_cmd(wq_head, condition, cmd1, cmd2) \
do { \
if (condition) \
break; \
__wait_event_cmd(wq_head, condition, cmd1, cmd2); \
} while (0)
wait_event_interruptible
#define __wait_event_interruptible(wq_head, condition) \
___wait_event(wq_head, condition, TASK_INTERRUPTIBLE, 0, 0, \
schedule())
/**
* wait_event_interruptible - sleep until a condition gets true
* @wq_head: the waitqueue to wait on
* @condition: a C expression for the event to wait for
*
* The process is put to sleep (TASK_INTERRUPTIBLE) until the
* @condition evaluates to true or a signal is received.
* The @condition is checked each time the waitqueue @wq_head is woken up.
*
* wake_up() has to be called after changing any variable that could
* change the result of the wait condition.
*
* The function will return -ERESTARTSYS if it was interrupted by a
* signal and 0 if @condition evaluated to true.
*/
#define wait_event_interruptible(wq_head, condition) \
({ \
int __ret = 0; \
might_sleep(); \
if (!(condition)) \
__ret = __wait_event_interruptible(wq_head, condition); \
__ret; \
})
wait_event_interruptible_timeout
#define __wait_event_interruptible_timeout(wq_head, condition, timeout) \
___wait_event(wq_head, ___wait_cond_timeout(condition), \
TASK_INTERRUPTIBLE, 0, timeout, \
__ret = schedule_timeout(__ret))
/**
* wait_event_interruptible_timeout - sleep until a condition gets true or a timeout elapses
* @wq_head: the waitqueue to wait on
* @condition: a C expression for the event to wait for
* @timeout: timeout, in jiffies
*
* The process is put to sleep (TASK_INTERRUPTIBLE) until the
* @condition evaluates to true or a signal is received.
* The @condition is checked each time the waitqueue @wq_head is woken up.
*
* wake_up() has to be called after changing any variable that could
* change the result of the wait condition.
*
* Returns:
* 0 if the @condition evaluated to %false after the @timeout elapsed,
* 1 if the @condition evaluated to %true after the @timeout elapsed,
* the remaining jiffies (at least 1) if the @condition evaluated
* to %true before the @timeout elapsed, or -%ERESTARTSYS if it was
* interrupted by a signal.
*/
#define wait_event_interruptible_timeout(wq_head, condition, timeout) \
({ \
long __ret = timeout; \
might_sleep(); \
if (!___wait_cond_timeout(condition)) \
__ret = __wait_event_interruptible_timeout(wq_head, \
condition, timeout); \
__ret; \
})
wait_event_killable
#define __wait_event_killable(wq, condition) \
___wait_event(wq, condition, TASK_KILLABLE, 0, 0, schedule())
/**
* wait_event_killable - sleep until a condition gets true
* @wq_head: the waitqueue to wait on
* @condition: a C expression for the event to wait for
*
* The process is put to sleep (TASK_KILLABLE) until the
* @condition evaluates to true or a signal is received.
* The @condition is checked each time the waitqueue @wq_head is woken up.
*
* wake_up() has to be called after changing any variable that could
* change the result of the wait condition.
*
* The function will return -ERESTARTSYS if it was interrupted by a
* signal and 0 if @condition evaluated to true.
*/
#define wait_event_killable(wq_head, condition) \
({ \
int __ret = 0; \
might_sleep(); \
if (!(condition)) \
__ret = __wait_event_killable(wq_head, condition); \
__ret; \
})
根据以上宏定义的展开代码看,wait_event_*()宏中:
- process进入wait后的state是不同的,有的uninterruptible,有的interruptible,有的killable;
- condition是在每次wq_head被唤醒时都会去检查;
唤醒Queued task
在waitqueue中的process可以用下面这些macros去唤醒wakeup。
唤醒的宏 |
---|
wake_up |
wake_up_all |
wake_up_interruptible |
wake_up_interruptible_sync |
__wake_up
/**
* __wake_up - wake up threads blocked on a waitqueue.
* @wq_head: the waitqueue
* @mode: which threads
* @nr_exclusive: how many wake-one or wake-many threads to wake up
* @key: is directly passed to the wakeup function
*
* If this function wakes up a task, it executes a full memory barrier before
* accessing the task state.
*/
void __wake_up(struct wait_queue_head *wq_head, unsigned int mode,
int nr_exclusive, void *key)
{
__wake_up_common_lock(wq_head, mode, nr_exclusive, 0, key);
}
EXPORT_SYMBOL(__wake_up);
wake_up
#define wake_up(x) __wake_up(x, TASK_NORMAL, 1, NULL)
其中,x是&wq,waitqueue的地址,比如wake_up(&wq)。
wake_up_all
#define wake_up_all(x) __wake_up(x, TASK_NORMAL, 0, NULL)
wake_up_interruptible
#define wake_up_interruptible(x) __wake_up(x, TASK_INTERRUPTIBLE, 1, NULL)
wake_up_interruptible_sync
#define WF_SYNC 0x10 /* Waker goes to sleep after wakeup */
#define wake_up_interruptible_sync(x) __wake_up_sync((x), TASK_INTERRUPTIBLE)
/*
* __wake_up_sync - see __wake_up_sync_key()
*/
void __wake_up_sync(struct wait_queue_head *wq_head, unsigned int mode)
{
__wake_up_sync_key(wq_head, mode, NULL);
}
EXPORT_SYMBOL_GPL(__wake_up_sync); /* For internal use only */
/**
* __wake_up_sync_key - wake up threads blocked on a waitqueue.
* @wq_head: the waitqueue
* @mode: which threads
* @key: opaque value to be passed to wakeup targets
*
* The sync wakeup differs that the waker knows that it will schedule
* away soon, so while the target thread will be woken up, it will not
* be migrated to another CPU - ie. the two threads are 'synchronized'
* with each other. This can prevent needless bouncing between CPUs.
*
* On UP it can prevent extra preemption.
*
* If this function wakes up a task, it executes a full memory barrier before
* accessing the task state.
*/
void __wake_up_sync_key(struct wait_queue_head *wq_head, unsigned int mode,
void *key)
{
if (unlikely(!wq_head))
return;
__wake_up_common_lock(wq_head, mode, 1, WF_SYNC, key);
}
EXPORT_SYMBOL_GPL(__wake_up_sync_key);
wake_up调用会立即出发reschedule发生,这意味着其他的进程可能在wake_up返回前就开始运行了。所以在已知当前进程即将进入sleep状态时,通过sync的wake up调用(WF_SYNC定义),来避免reschedule的发生。
需要注意的是:唤醒的进行可以立即在其他CPU上运行,所以这些宏并不能保证waker与wakee之间的互斥(争用同一个CPU)。
示例
static初始化方式
该模块功能:创建WaitThread内核线程,每当read读/dev/wq_dev设备时,该线程被唤醒,进行read次数记录并打印。
其中,wait_queue_flag != 0
作为condition条件,每次read时,wait_queue_flag都被置1,达到唤醒WaitThread的触发条件,并调用wake_up_interruptible(&wait_queue_test)宏来唤醒WaitThread线程。**注意:这里wake up只是指定了wait queue,并没有指定到具体的线程任务,那么,调用wake_up_interruptible(&wait_queue_test)是否会导致等待队列里的所有任务都去检查contidion呢?**答案是肯定的(参考wake_up_*系列的宏定义)。
另外,当该module退出exit时,wait_queue_flag被置2,调用wake_up_interruptible(&wait_queue_test)去唤醒WaitThread执行exit操作。
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/kdev_t.h>
#include <linux/fs.h>
#include <linux/cdev.h>
#include <linux/device.h>
#include <linux/slab.h> //kmalloc()
#include <linux/uaccess.h> //copy_to/from_user()
#include <linux/kthread.h>
#include <linux/wait.h> // Required for the wait queues
uint32_t read_count = 0;
static struct task_struct *wait_thread;
DECLARE_WAIT_QUEUE_HEAD(wait_queue_test);
dev_t dev = 0;
static struct class *dev_class;
static struct cdev wq_cdev;
int wait_queue_flag = 0;
/*
** Function Prototypes
*/
static int __init wq_driver_init(void);
static void __exit wq_driver_exit(void);
/*************** Driver functions **********************/
static int wq_open(struct inode *inode, struct file *file);
static int wq_release(struct inode *inode, struct file *file);
static ssize_t wq_read(struct file *filp, char __user *buf, size_t len,loff_t * off);
static ssize_t wq_write(struct file *filp, const char *buf, size_t len, loff_t * off);
/*
** File operation sturcture
*/
static struct file_operations fops =
{
.owner = THIS_MODULE,
.read = wq_read,
.write = wq_write,
.open = wq_open,
.release = wq_release,
};
/*
** Thread function
*/
static int wait_function(void *unused)
{
while(1) {
pr_info("Waiting For Event...\n");
wait_event_interruptible(wait_queue_test, wait_queue_flag != 0 );
if(wait_queue_flag == 2) {
pr_info("Event Came From Exit Function\n");
return 0;
}
pr_info("Event Came From Read Function - %d\n", ++read_count);
wait_queue_flag = 0;
}
do_exit(0);
return 0;
}
/*
** This function will be called when we open the Device file
*/
static int wq_open(struct inode *inode, struct file *file)
{
pr_info("Device File Opened...!!!\n");
return 0;
}
/*
** This function will be called when we close the Device file
*/
static int wq_release(struct inode *inode, struct file *file)
{
pr_info("Device File Closed...!!!\n");
return 0;
}
/*
** This function will be called when we read the Device file
*/
static ssize_t wq_read(struct file *filp, char __user *buf, size_t len, loff_t *off)
{
pr_info("Read Function\n");
wait_queue_flag = 1;
wake_up_interruptible(&wait_queue_test);
return 0;
}
/*
** This function will be called when we write the Device file
*/
static ssize_t wq_write(struct file *filp, const char __user *buf, size_t len, loff_t *off)
{
pr_info("Write function\n");
return len;
}
/*
** Module Init function
*/
static int __init wq_driver_init(void)
{
/*Allocating Major number*/
if((alloc_chrdev_region(&dev, 0, 1, "wq_Dev")) <0){
pr_info("Cannot allocate major number\n");
return -1;
}
pr_info("Major = %d Minor = %d \n",MAJOR(dev), MINOR(dev));
/*Creating cdev structure*/
cdev_init(&wq_cdev,&fops);
wq_cdev.owner = THIS_MODULE;
wq_cdev.ops = &fops;
/*Adding character device to the system*/
if((cdev_add(&wq_cdev,dev,1)) < 0){
pr_info("Cannot add the device to the system\n");
goto r_class;
}
/*Creating struct class*/
if((dev_class = class_create(THIS_MODULE,"wq_class")) == NULL){
pr_info("Cannot create the struct class\n");
goto r_class;
}
/*Creating device*/
if((device_create(dev_class,NULL,dev,NULL,"wq_device")) == NULL){
pr_info("Cannot create the Device 1\n");
goto r_device;
}
//Create the kernel thread with name 'WaitThread'
wait_thread = kthread_create(wait_function, NULL, "WaitThread");
if (wait_thread) {
pr_info("Thread Created successfully\n");
wake_up_process(wait_thread);
} else
pr_info("Thread creation failed\n");
pr_info("Device Driver Insert...Done!!!\n");
return 0;
r_device:
class_destroy(dev_class);
r_class:
unregister_chrdev_region(dev,1);
return -1;
}
/*
** Module exit function
*/
static void __exit wq_driver_exit(void)
{
wait_queue_flag = 2;
wake_up_interruptible(&wait_queue_test);
device_destroy(dev_class,dev);
class_destroy(dev_class);
cdev_del(&wq_cdev);
unregister_chrdev_region(dev, 1);
pr_info("Device Driver Remove...Done!!!\n");
}
module_init(wq_driver_init);
module_exit(wq_driver_exit);
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("Simple linux driver (Waitqueue Static method)");
MODULE_VERSION("1.0");
Makefile
obj-m += static_waitqueue.o
KDIR = /lib/modules/$(shell uname -r)/build
all:
make -C $(KDIR) M=$(shell pwd) modules
clean:
make -C $(KDIR) M=$(shell pwd) clean
dynamic初始化方式
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/kdev_t.h>
#include <linux/fs.h>
#include <linux/cdev.h>
#include <linux/device.h>
#include <linux/slab.h> //kmalloc()
#include <linux/uaccess.h> //copy_to/from_user()
#include <linux/kthread.h>
#include <linux/wait.h> // Required for the wait queues
uint32_t read_count = 0;
static struct task_struct *wait_thread;
dev_t dev = 0;
static struct class *dev_class;
static struct cdev wq_cdev;
wait_queue_head_t wait_queue_test;
int wait_queue_flag = 0;
/*
** Function Prototypes
*/
static int __init wq_driver_init(void);
static void __exit wq_driver_exit(void);
/*************** Driver functions **********************/
static int wq_open(struct inode *inode, struct file *file);
static int wq_release(struct inode *inode, struct file *file);
static ssize_t wq_read(struct file *filp, char __user *buf, size_t len,loff_t * off);
static ssize_t wq_write(struct file *filp, const char *buf, size_t len, loff_t * off);
/*
** File operation sturcture
*/
static struct file_operations fops =
{
.owner = THIS_MODULE,
.read = wq_read,
.write = wq_write,
.open = wq_open,
.release = wq_release,
};
/*
** Thread function
*/
static int wait_function(void *unused)
{
while(1) {
pr_info("Waiting For Event...\n");
wait_event_interruptible(wait_queue_test, wait_queue_flag != 0 );
if(wait_queue_flag == 2) {
pr_info("Event Came From Exit Function\n");
return 0;
}
pr_info("Event Came From Read Function - %d\n", ++read_count);
wait_queue_flag = 0;
}
return 0;
}
/*
** This function will be called when we open the Device file
*/
static int wq_open(struct inode *inode, struct file *file)
{
pr_info("Device File Opened...!!!\n");
return 0;
}
/*
** This function will be called when we close the Device file
*/
static int wq_release(struct inode *inode, struct file *file)
{
pr_info("Device File Closed...!!!\n");
return 0;
}
/*
** This function will be called when we read the Device file
*/
static ssize_t wq_read(struct file *filp, char __user *buf, size_t len, loff_t *off)
{
pr_info("Read Function\n");
wait_queue_flag = 1;
wake_up_interruptible(&wait_queue_test);
return 0;
}
/*
** This function will be called when we write the Device file
*/
static ssize_t wq_write(struct file *filp, const char __user *buf, size_t len, loff_t *off)
{
pr_info("Write function\n");
return len;
}
/*
** Module Init function
*/
static int __init wq_driver_init(void)
{
/*Allocating Major number*/
if((alloc_chrdev_region(&dev, 0, 1, "wq_Dev")) <0){
pr_info("Cannot allocate major number\n");
return -1;
}
pr_info("Major = %d Minor = %d \n",MAJOR(dev), MINOR(dev));
/*Creating cdev structure*/
cdev_init(&wq_cdev,&fops);
/*Adding character device to the system*/
if((cdev_add(&wq_cdev,dev,1)) < 0){
pr_info("Cannot add the device to the system\n");
goto r_class;
}
/*Creating struct class*/
if((dev_class = class_create(THIS_MODULE,"wq_class")) == NULL){
pr_info("Cannot create the struct class\n");
goto r_class;
}
/*Creating device*/
if((device_create(dev_class,NULL,dev,NULL,"wq_device")) == NULL){
pr_info("Cannot create the Device 1\n");
goto r_device;
}
//Initialize wait queue
init_waitqueue_head(&wait_queue_test);
//Create the kernel thread with name 'WaitThread'
wait_thread = kthread_create(wait_function, NULL, "WaitThread");
if (wait_thread) {
pr_info("Thread Created successfully\n");
wake_up_process(wait_thread);
} else
pr_info("Thread creation failed\n");
pr_info("Device Driver Insert...Done!!!\n");
return 0;
r_device:
class_destroy(dev_class);
r_class:
unregister_chrdev_region(dev,1);
return -1;
}
/*
** Module exit function
*/
static void __exit wq_driver_exit(void)
{
wait_queue_flag = 2;
wake_up_interruptible(&wait_queue_test);
device_destroy(dev_class,dev);
class_destroy(dev_class);
cdev_del(&wq_cdev);
unregister_chrdev_region(dev, 1);
pr_info("Device Driver Remove...Done!!!\n");
}
module_init(wq_driver_init);
module_exit(wq_driver_exit);
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("Simple linux driver (Waitqueue Dynamic method)");
MODULE_VERSION("1.0");
运行
可以看到创建了内核线程WaitThread,每当对/dev/wq_dev进行read操作,WaitThread都会被唤醒进行记录read次数并打印。
root@pc:static# insmod static_waitqueue.ko
root@pc:static# dmesg
[73171.445834] Major = 237 Minor = 0
[73171.446330] Thread Created successfully
[73171.446340] Device Driver Insert...Done!!!
[73171.446419] Waiting For Event...
root@pc:static# echo 222 >/dev/wq_device
root@pc:static# cat /dev/wq_device
root@pc:static# dmesg
[73171.445834] Major = 237 Minor = 0
[73171.446330] Thread Created successfully
[73171.446340] Device Driver Insert...Done!!!
[73171.446419] Waiting For Event...
[73184.720942] Device File Opened...!!!
[73184.720978] Write function
[73184.720986] Device File Closed...!!!
[73193.355795] Device File Opened...!!!
[73193.355823] Read Function
[73193.355868] Device File Closed...!!!
[73193.355913] Event Came From Read Function - 1
[73193.355916] Waiting For Event...
root@pc:static# ps -f -C WaitThread
UID PID PPID C STIME TTY TIME CMD
root 58059 2 0 14:17 ? 00:00:00 [WaitThread]
reference
Linux Device Driver Tutorials - ch10