// // epoll_reactor.hpp // ~~~~~~~~~~~~~~~~~ // // Copyright (c) 2003-2008 Christopher M. Kohlhoff (chris at kohlhoff dot com) // // Distributed under the Boost Software License, Version 1.0. (See accompanying // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) // #ifndef ASIO_DETAIL_EPOLL_REACTOR_HPP #define ASIO_DETAIL_EPOLL_REACTOR_HPP #if defined(_MSC_VER) && (_MSC_VER >= 1200) # pragma once #endif // defined(_MSC_VER) && (_MSC_VER >= 1200) #include "asio/detail/push_options.hpp" #include "asio/detail/epoll_reactor_fwd.hpp" #if defined(ASIO_HAS_EPOLL) #include "asio/detail/push_options.hpp" #include #include #include #include #include #include #include "asio/detail/pop_options.hpp" #include "asio/error.hpp" #include "asio/io_service.hpp" #include "asio/system_error.hpp" #include "asio/detail/bind_handler.hpp" #include "asio/detail/hash_map.hpp" #include "asio/detail/mutex.hpp" #include "asio/detail/task_io_service.hpp" #include "asio/detail/thread.hpp" #include "asio/detail/reactor_op_queue.hpp" #include "asio/detail/select_interrupter.hpp" #include "asio/detail/service_base.hpp" #include "asio/detail/signal_blocker.hpp" #include "asio/detail/socket_types.hpp" #include "asio/detail/timer_queue.hpp" namespace asio { namespace detail { template class epoll_reactor : public asio::detail::service_base > { public: // Per-descriptor data. struct per_descriptor_data { bool allow_speculative_read; bool allow_speculative_write; }; // Constructor. epoll_reactor(asio::io_service& io_service) : asio::detail::service_base >(io_service), mutex_(), epoll_fd_(do_epoll_create()), wait_in_progress_(false), interrupter_(), read_op_queue_(), write_op_queue_(), except_op_queue_(), pending_cancellations_(), stop_thread_(false), thread_(0), shutdown_(false), need_epoll_wait_(true) { // Start the reactor's internal thread only if needed. if (Own_Thread) { asio::detail::signal_blocker sb; thread_ = new asio::detail::thread( bind_handler(&epoll_reactor::call_run_thread, this)); } // Add the interrupter's descriptor to epoll. epoll_event ev = { 0, { 0 } }; ev.events = EPOLLIN | EPOLLERR; ev.data.fd = interrupter_.read_descriptor(); epoll_ctl(epoll_fd_, EPOLL_CTL_ADD, interrupter_.read_descriptor(), &ev); } // Destructor. ~epoll_reactor() { shutdown_service(); close(epoll_fd_); } // Destroy all user-defined handler objects owned by the service. void shutdown_service() { asio::detail::mutex::scoped_lock lock(mutex_); shutdown_ = true; stop_thread_ = true; lock.unlock(); if (thread_) { interrupter_.interrupt(); thread_->join(); delete thread_; thread_ = 0; } read_op_queue_.destroy_operations(); write_op_queue_.destroy_operations(); except_op_queue_.destroy_operations(); for (std::size_t i = 0; i < timer_queues_.size(); ++i) timer_queues_[i]->destroy_timers(); timer_queues_.clear(); } // Register a socket with the reactor. Returns 0 on success, system error // code on failure. int register_descriptor(socket_type descriptor, per_descriptor_data& descriptor_data) { // No need to lock according to epoll documentation. descriptor_data.allow_speculative_read = true; descriptor_data.allow_speculative_write = true; epoll_event ev = { 0, { 0 } }; ev.events = 0; ev.data.fd = descriptor; int result = epoll_ctl(epoll_fd_, EPOLL_CTL_ADD, descriptor, &ev); if (result != 0) return errno; return 0; } // Start a new read operation. The handler object will be invoked when the // given descriptor is ready to be read, or an error has occurred. template void start_read_op(socket_type descriptor, per_descriptor_data& descriptor_data, Handler handler, bool allow_speculative_read = true) { if (allow_speculative_read && descriptor_data.allow_speculative_read) { asio::error_code ec; std::size_t bytes_transferred = 0; if (handler.perform(ec, bytes_transferred)) { handler.complete(ec, bytes_transferred); return; } // We only get one shot at a speculative read in this function. allow_speculative_read = false; } asio::detail::mutex::scoped_lock lock(mutex_); if (shutdown_) return; if (!allow_speculative_read) need_epoll_wait_ = true; else if (!read_op_queue_.has_operation(descriptor)) { // Speculative reads are ok as there are no queued read operations. descriptor_data.allow_speculative_read = true; asio::error_code ec; std::size_t bytes_transferred = 0; if (handler.perform(ec, bytes_transferred)) { handler.complete(ec, bytes_transferred); return; } } // Speculative reads are not ok as there will be queued read operations. descriptor_data.allow_speculative_read = false; if (read_op_queue_.enqueue_operation(descriptor, handler)) { epoll_event ev = { 0, { 0 } }; ev.events = EPOLLIN | EPOLLERR | EPOLLHUP; if (write_op_queue_.has_operation(descriptor)) ev.events |= EPOLLOUT; if (except_op_queue_.has_operation(descriptor)) ev.events |= EPOLLPRI; ev.data.fd = descriptor; int result = epoll_ctl(epoll_fd_, EPOLL_CTL_MOD, descriptor, &ev); if (result != 0 && errno == ENOENT) result = epoll_ctl(epoll_fd_, EPOLL_CTL_ADD, descriptor, &ev); if (result != 0) { asio::error_code ec(errno, asio::error::get_system_category()); read_op_queue_.perform_all_operations(descriptor, ec); } } } // Start a new write operation. The handler object will be invoked when the // given descriptor is ready to be written, or an error has occurred. template void start_write_op(socket_type descriptor, per_descriptor_data& descriptor_data, Handler handler, bool allow_speculative_write = true) { if (allow_speculative_write && descriptor_data.allow_speculative_write) { asio::error_code ec; std::size_t bytes_transferred = 0; if (handler.perform(ec, bytes_transferred)) { handler.complete(ec, bytes_transferred); return; } // We only get one shot at a speculative write in this function. allow_speculative_write = false; } asio::detail::mutex::scoped_lock lock(mutex_); if (shutdown_) return; if (!allow_speculative_write) need_epoll_wait_ = true; else if (!write_op_queue_.has_operation(descriptor)) { // Speculative writes are ok as there are no queued write operations. descriptor_data.allow_speculative_write = true; asio::error_code ec; std::size_t bytes_transferred = 0; if (handler.perform(ec, bytes_transferred)) { handler.complete(ec, bytes_transferred); return; } } // Speculative writes are not ok as there will be queued write operations. descriptor_data.allow_speculative_write = false; if (write_op_queue_.enqueue_operation(descriptor, handler)) { epoll_event ev = { 0, { 0 } }; ev.events = EPOLLOUT | EPOLLERR | EPOLLHUP; if (read_op_queue_.has_operation(descriptor)) ev.events |= EPOLLIN; if (except_op_queue_.has_operation(descriptor)) ev.events |= EPOLLPRI; ev.data.fd = descriptor; int result = epoll_ctl(epoll_fd_, EPOLL_CTL_MOD, descriptor, &ev); if (result != 0 && errno == ENOENT) result = epoll_ctl(epoll_fd_, EPOLL_CTL_ADD, descriptor, &ev); if (result != 0) { asio::error_code ec(errno, asio::error::get_system_category()); write_op_queue_.perform_all_operations(descriptor, ec); } } } // Start a new exception operation. The handler object will be invoked when // the given descriptor has exception information, or an error has occurred. template void start_except_op(socket_type descriptor, per_descriptor_data&, Handler handler) { asio::detail::mutex::scoped_lock lock(mutex_); if (shutdown_) return; if (except_op_queue_.enqueue_operation(descriptor, handler)) { epoll_event ev = { 0, { 0 } }; ev.events = EPOLLPRI | EPOLLERR | EPOLLHUP; if (read_op_queue_.has_operation(descriptor)) ev.events |= EPOLLIN; if (write_op_queue_.has_operation(descriptor)) ev.events |= EPOLLOUT; ev.data.fd = descriptor; int result = epoll_ctl(epoll_fd_, EPOLL_CTL_MOD, descriptor, &ev); if (result != 0 && errno == ENOENT) result = epoll_ctl(epoll_fd_, EPOLL_CTL_ADD, descriptor, &ev); if (result != 0) { asio::error_code ec(errno, asio::error::get_system_category()); except_op_queue_.perform_all_operations(descriptor, ec); } } } // Start a new write operation. The handler object will be invoked when the // given descriptor is ready for writing or an error has occurred. Speculative // writes are not allowed. template void start_connect_op(socket_type descriptor, per_descriptor_data& descriptor_data, Handler handler) { asio::detail::mutex::scoped_lock lock(mutex_); if (shutdown_) return; // Speculative writes are not ok as there will be queued write operations. descriptor_data.allow_speculative_write = false; if (write_op_queue_.enqueue_operation(descriptor, handler)) { epoll_event ev = { 0, { 0 } }; ev.events = EPOLLOUT | EPOLLERR | EPOLLHUP; if (read_op_queue_.has_operation(descriptor)) ev.events |= EPOLLIN; if (except_op_queue_.has_operation(descriptor)) ev.events |= EPOLLPRI; ev.data.fd = descriptor; int result = epoll_ctl(epoll_fd_, EPOLL_CTL_MOD, descriptor, &ev); if (result != 0 && errno == ENOENT) result = epoll_ctl(epoll_fd_, EPOLL_CTL_ADD, descriptor, &ev); if (result != 0) { asio::error_code ec(errno, asio::error::get_system_category()); write_op_queue_.perform_all_operations(descriptor, ec); } } } // Cancel all operations associated with the given descriptor. The // handlers associated with the descriptor will be invoked with the // operation_aborted error. void cancel_ops(socket_type descriptor, per_descriptor_data&) { asio::detail::mutex::scoped_lock lock(mutex_); cancel_ops_unlocked(descriptor); } // Cancel any operations that are running against the descriptor and remove // its registration from the reactor. void close_descriptor(socket_type descriptor, per_descriptor_data&) { asio::detail::mutex::scoped_lock lock(mutex_); // Remove the descriptor from epoll. epoll_event ev = { 0, { 0 } }; epoll_ctl(epoll_fd_, EPOLL_CTL_DEL, descriptor, &ev); // Cancel any outstanding operations associated with the descriptor. cancel_ops_unlocked(descriptor); } // Add a new timer queue to the reactor. template void add_timer_queue(timer_queue& timer_queue) { asio::detail::mutex::scoped_lock lock(mutex_); timer_queues_.push_back(&timer_queue); } // Remove a timer queue from the reactor. template void remove_timer_queue(timer_queue& timer_queue) { asio::detail::mutex::scoped_lock lock(mutex_); for (std::size_t i = 0; i < timer_queues_.size(); ++i) { if (timer_queues_[i] == &timer_queue) { timer_queues_.erase(timer_queues_.begin() + i); return; } } } // Schedule a timer in the given timer queue to expire at the specified // absolute time. The handler object will be invoked when the timer expires. template void schedule_timer(timer_queue& timer_queue, const typename Time_Traits::time_type& time, Handler handler, void* token) { asio::detail::mutex::scoped_lock lock(mutex_); if (!shutdown_) if (timer_queue.enqueue_timer(time, handler, token)) interrupter_.interrupt(); } // Cancel the timer associated with the given token. Returns the number of // handlers that have been posted or dispatched. template std::size_t cancel_timer(timer_queue& timer_queue, void* token) { asio::detail::mutex::scoped_lock lock(mutex_); std::size_t n = timer_queue.cancel_timer(token); if (n > 0) interrupter_.interrupt(); return n; } private: friend class task_io_service >; // Run epoll once until interrupted or events are ready to be dispatched. void run(bool block) { asio::detail::mutex::scoped_lock lock(mutex_); // Dispatch any operation cancellations that were made while the select // loop was not running. read_op_queue_.perform_cancellations(); write_op_queue_.perform_cancellations(); except_op_queue_.perform_cancellations(); for (std::size_t i = 0; i < timer_queues_.size(); ++i) timer_queues_[i]->dispatch_cancellations(); // Check if the thread is supposed to stop. if (stop_thread_) { complete_operations_and_timers(lock); return; } // We can return immediately if there's no work to do and the reactor is // not supposed to block. if (!block && read_op_queue_.empty() && write_op_queue_.empty() && except_op_queue_.empty() && all_timer_queues_are_empty()) { complete_operations_and_timers(lock); return; } int timeout = block ? get_timeout() : 0; wait_in_progress_ = true; lock.unlock(); // Block on the epoll descriptor. epoll_event events[128]; int num_events = (block || need_epoll_wait_) ? epoll_wait(epoll_fd_, events, 128, timeout) : 0; lock.lock(); wait_in_progress_ = false; // Block signals while performing operations. asio::detail::signal_blocker sb; // Dispatch the waiting events. for (int i = 0; i < num_events; ++i) { int descriptor = events[i].data.fd; if (descriptor == interrupter_.read_descriptor()) { interrupter_.reset(); } else { bool more_reads = false; bool more_writes = false; bool more_except = false; asio::error_code ec; // Exception operations must be processed first to ensure that any // out-of-band data is read before normal data. if (events[i].events & (EPOLLPRI | EPOLLERR | EPOLLHUP)) more_except = except_op_queue_.perform_operation(descriptor, ec); else more_except = except_op_queue_.has_operation(descriptor); if (events[i].events & (EPOLLIN | EPOLLERR | EPOLLHUP)) more_reads = read_op_queue_.perform_operation(descriptor, ec); else more_reads = read_op_queue_.has_operation(descriptor); if (events[i].events & (EPOLLOUT | EPOLLERR | EPOLLHUP)) more_writes = write_op_queue_.perform_operation(descriptor, ec); else more_writes = write_op_queue_.has_operation(descriptor); if ((events[i].events & (EPOLLERR | EPOLLHUP)) != 0 && (events[i].events & ~(EPOLLERR | EPOLLHUP)) == 0 && !more_except && !more_reads && !more_writes) { // If we have an event and no operations associated with the // descriptor then we need to delete the descriptor from epoll. The // epoll_wait system call can produce EPOLLHUP or EPOLLERR events // when there is no operation pending, so if we do not remove the // descriptor we can end up in a tight loop of repeated // calls to epoll_wait. epoll_event ev = { 0, { 0 } }; epoll_ctl(epoll_fd_, EPOLL_CTL_DEL, descriptor, &ev); } else { epoll_event ev = { 0, { 0 } }; ev.events = EPOLLERR | EPOLLHUP; if (more_reads) ev.events |= EPOLLIN; if (more_writes) ev.events |= EPOLLOUT; if (more_except) ev.events |= EPOLLPRI; ev.data.fd = descriptor; int result = epoll_ctl(epoll_fd_, EPOLL_CTL_MOD, descriptor, &ev); if (result != 0 && errno == ENOENT) result = epoll_ctl(epoll_fd_, EPOLL_CTL_ADD, descriptor, &ev); if (result != 0) { ec = asio::error_code(errno, asio::error::get_system_category()); read_op_queue_.perform_all_operations(descriptor, ec); write_op_queue_.perform_all_operations(descriptor, ec); except_op_queue_.perform_all_operations(descriptor, ec); } } } } read_op_queue_.perform_cancellations(); write_op_queue_.perform_cancellations(); except_op_queue_.perform_cancellations(); for (std::size_t i = 0; i < timer_queues_.size(); ++i) { timer_queues_[i]->dispatch_timers(); timer_queues_[i]->dispatch_cancellations(); } // Issue any pending cancellations. for (size_t i = 0; i < pending_cancellations_.size(); ++i) cancel_ops_unlocked(pending_cancellations_[i]); pending_cancellations_.clear(); // Determine whether epoll_wait should be called when the reactor next runs. need_epoll_wait_ = !read_op_queue_.empty() || !write_op_queue_.empty() || !except_op_queue_.empty(); complete_operations_and_timers(lock); } // Run the select loop in the thread. void run_thread() { asio::detail::mutex::scoped_lock lock(mutex_); while (!stop_thread_) { lock.unlock(); run(true); lock.lock(); } } // Entry point for the select loop thread. static void call_run_thread(epoll_reactor* reactor) { reactor->run_thread(); } // Interrupt the select loop. void interrupt() { interrupter_.interrupt(); } // The hint to pass to epoll_create to size its data structures. enum { epoll_size = 20000 }; // Create the epoll file descriptor. Throws an exception if the descriptor // cannot be created. static int do_epoll_create() { int fd = epoll_create(epoll_size); if (fd == -1) { boost::throw_exception( asio::system_error( asio::error_code(errno, asio::error::get_system_category()), "epoll")); } return fd; } // Check if all timer queues are empty. bool all_timer_queues_are_empty() const { for (std::size_t i = 0; i < timer_queues_.size(); ++i) if (!timer_queues_[i]->empty()) return false; return true; } // Get the timeout value for the epoll_wait call. The timeout value is // returned as a number of milliseconds. A return value of -1 indicates // that epoll_wait should block indefinitely. int get_timeout() { if (all_timer_queues_are_empty()) return -1; // By default we will wait no longer than 5 minutes. This will ensure that // any changes to the system clock are detected after no longer than this. boost::posix_time::time_duration minimum_wait_duration = boost::posix_time::minutes(5); for (std::size_t i = 0; i < timer_queues_.size(); ++i) { boost::posix_time::time_duration wait_duration = timer_queues_[i]->wait_duration(); if (wait_duration < minimum_wait_duration) minimum_wait_duration = wait_duration; } if (minimum_wait_duration > boost::posix_time::time_duration()) { int milliseconds = minimum_wait_duration.total_milliseconds(); return milliseconds > 0 ? milliseconds : 1; } else { return 0; } } // Cancel all operations associated with the given descriptor. The do_cancel // function of the handler objects will be invoked. This function does not // acquire the epoll_reactor's mutex. void cancel_ops_unlocked(socket_type descriptor) { bool interrupt = read_op_queue_.cancel_operations(descriptor); interrupt = write_op_queue_.cancel_operations(descriptor) || interrupt; interrupt = except_op_queue_.cancel_operations(descriptor) || interrupt; if (interrupt) interrupter_.interrupt(); } // Clean up operations and timers. We must not hold the lock since the // destructors may make calls back into this reactor. We make a copy of the // vector of timer queues since the original may be modified while the lock // is not held. void complete_operations_and_timers( asio::detail::mutex::scoped_lock& lock) { timer_queues_for_cleanup_ = timer_queues_; lock.unlock(); read_op_queue_.complete_operations(); write_op_queue_.complete_operations(); except_op_queue_.complete_operations(); for (std::size_t i = 0; i < timer_queues_for_cleanup_.size(); ++i) timer_queues_for_cleanup_[i]->complete_timers(); } // Mutex to protect access to internal data. asio::detail::mutex mutex_; // The epoll file descriptor. int epoll_fd_; // Whether the epoll_wait call is currently in progress bool wait_in_progress_; // The interrupter is used to break a blocking epoll_wait call. select_interrupter interrupter_; // The queue of read operations. reactor_op_queue read_op_queue_; // The queue of write operations. reactor_op_queue write_op_queue_; // The queue of except operations. reactor_op_queue except_op_queue_; // The timer queues. std::vector timer_queues_; // A copy of the timer queues, used when cleaning up timers. The copy is // stored as a class data member to avoid unnecessary memory allocation. std::vector timer_queues_for_cleanup_; // The descriptors that are pending cancellation. std::vector pending_cancellations_; // Does the reactor loop thread need to stop. bool stop_thread_; // The thread that is running the reactor loop. asio::detail::thread* thread_; // Whether the service has been shut down. bool shutdown_; // Whether we need to call epoll_wait the next time the reactor is run. bool need_epoll_wait_; }; } // namespace detail } // namespace asio #endif // defined(ASIO_HAS_EPOLL) #include "asio/detail/pop_options.hpp" #endif // ASIO_DETAIL_EPOLL_REACTOR_HPP