taskqueue.cpp revision 1472
1008N/A/*
1008N/A * Copyright (c) 2001, 2009, Oracle and/or its affiliates. All rights reserved.
1008N/A * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
1008N/A *
1008N/A * This code is free software; you can redistribute it and/or modify it
1008N/A * under the terms of the GNU General Public License version 2 only, as
1008N/A * published by the Free Software Foundation.
1008N/A *
1008N/A * This code is distributed in the hope that it will be useful, but WITHOUT
1008N/A * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
1008N/A * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
1008N/A * version 2 for more details (a copy is included in the LICENSE file that
1008N/A * accompanied this code).
1008N/A *
1008N/A * You should have received a copy of the GNU General Public License version
1008N/A * 2 along with this work; if not, write to the Free Software Foundation,
1008N/A * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
1008N/A *
1008N/A * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
1008N/A * or visit www.oracle.com if you need additional information or have any
1008N/A * questions.
1008N/A *
1008N/A */
1008N/A
1008N/A# include "incls/_precompiled.incl"
1008N/A# include "incls/_taskqueue.cpp.incl"
1008N/A
1008N/A#ifdef TRACESPINNING
1008N/Auint ParallelTaskTerminator::_total_yields = 0;
1008N/Auint ParallelTaskTerminator::_total_spins = 0;
1008N/Auint ParallelTaskTerminator::_total_peeks = 0;
1008N/A#endif
1008N/A
1008N/Aint TaskQueueSetSuper::randomParkAndMiller(int *seed0) {
1008N/A const int a = 16807;
1008N/A const int m = 2147483647;
1008N/A const int q = 127773; /* m div a */
1008N/A const int r = 2836; /* m mod a */
1008N/A assert(sizeof(int) == 4, "I think this relies on that");
1008N/A int seed = *seed0;
1008N/A int hi = seed / q;
1008N/A int lo = seed % q;
1008N/A int test = a * lo - r * hi;
1008N/A if (test > 0)
1008N/A seed = test;
1008N/A else
1008N/A seed = test + m;
1008N/A *seed0 = seed;
1008N/A return seed;
1008N/A}
1008N/A
1008N/AParallelTaskTerminator::
1008N/AParallelTaskTerminator(int n_threads, TaskQueueSetSuper* queue_set) :
1008N/A _n_threads(n_threads),
1008N/A _queue_set(queue_set),
1008N/A _offered_termination(0) {}
1008N/A
1008N/Abool ParallelTaskTerminator::peek_in_queue_set() {
1008N/A return _queue_set->peek();
1008N/A}
1008N/A
1008N/Avoid ParallelTaskTerminator::yield() {
1008N/A assert(_offered_termination <= _n_threads, "Invariant");
1008N/A os::yield();
1008N/A}
1008N/A
1008N/Avoid ParallelTaskTerminator::sleep(uint millis) {
1008N/A assert(_offered_termination <= _n_threads, "Invariant");
1008N/A os::sleep(Thread::current(), millis, false);
1008N/A}
1008N/A
1008N/Abool
1008N/AParallelTaskTerminator::offer_termination(TerminatorTerminator* terminator) {
1008N/A assert(_offered_termination < _n_threads, "Invariant");
1008N/A Atomic::inc(&_offered_termination);
1008N/A
1008N/A uint yield_count = 0;
1548N/A // Number of hard spin loops done since last yield
1548N/A uint hard_spin_count = 0;
1548N/A // Number of iterations in the hard spin loop.
1548N/A uint hard_spin_limit = WorkStealingHardSpins;
1548N/A
1548N/A // If WorkStealingSpinToYieldRatio is 0, no hard spinning is done.
1548N/A // If it is greater than 0, then start with a small number
1548N/A // of spins and increase number with each turn at spinning until
1548N/A // the count of hard spins exceeds WorkStealingSpinToYieldRatio.
1548N/A // Then do a yield() call and start spinning afresh.
1548N/A if (WorkStealingSpinToYieldRatio > 0) {
1548N/A hard_spin_limit = WorkStealingHardSpins >> WorkStealingSpinToYieldRatio;
1548N/A hard_spin_limit = MAX2(hard_spin_limit, 1U);
1548N/A }
1548N/A // Remember the initial spin limit.
1548N/A uint hard_spin_start = hard_spin_limit;
1548N/A
1548N/A // Loop waiting for all threads to offer termination or
1548N/A // more work.
1548N/A while (true) {
1548N/A assert(_offered_termination <= _n_threads, "Invariant");
1548N/A // Are all threads offering termination?
1548N/A if (_offered_termination == _n_threads) {
1548N/A return true;
1548N/A } else {
1008N/A // Look for more work.
1008N/A // Periodically sleep() instead of yield() to give threads
1008N/A // waiting on the cores the chance to grab this code
1008N/A if (yield_count <= WorkStealingYieldsBeforeSleep) {
1008N/A // Do a yield or hardspin. For purposes of deciding whether
1008N/A // to sleep, count this as a yield.
1008N/A yield_count++;
1008N/A
1008N/A // Periodically call yield() instead spinning
1008N/A // After WorkStealingSpinToYieldRatio spins, do a yield() call
1008N/A // and reset the counts and starting limit.
1008N/A if (hard_spin_count > WorkStealingSpinToYieldRatio) {
1008N/A yield();
1008N/A hard_spin_count = 0;
1008N/A hard_spin_limit = hard_spin_start;
1008N/A#ifdef TRACESPINNING
1008N/A _total_yields++;
1008N/A#endif
1008N/A } else {
1008N/A // Hard spin this time
1008N/A // Increase the hard spinning period but only up to a limit.
1008N/A hard_spin_limit = MIN2(2*hard_spin_limit,
1008N/A (uint) WorkStealingHardSpins);
1008N/A for (uint j = 0; j < hard_spin_limit; j++) {
1008N/A SpinPause();
1008N/A }
1008N/A hard_spin_count++;
1008N/A#ifdef TRACESPINNING
1008N/A _total_spins++;
1008N/A#endif
1008N/A }
1008N/A } else {
1008N/A if (PrintGCDetails && Verbose) {
1008N/A gclog_or_tty->print_cr("ParallelTaskTerminator::offer_termination() "
1008N/A "thread %d sleeps after %d yields",
1008N/A Thread::current(), yield_count);
1008N/A }
1008N/A yield_count = 0;
1008N/A // A sleep will cause this processor to seek work on another processor's
1008N/A // runqueue, if it has nothing else to run (as opposed to the yield
1426N/A // which may only move the thread to the end of the this processor's
1426N/A // runqueue).
1426N/A sleep(WorkStealingSleepMillis);
1426N/A }
1426N/A
1426N/A#ifdef TRACESPINNING
1426N/A _total_peeks++;
1426N/A#endif
1426N/A if (peek_in_queue_set() ||
1426N/A (terminator != NULL && terminator->should_exit_termination())) {
1426N/A Atomic::dec(&_offered_termination);
1426N/A assert(_offered_termination < _n_threads, "Invariant");
1426N/A return false;
1426N/A }
1426N/A }
1008N/A }
1008N/A}
1008N/A
1008N/A#ifdef TRACESPINNING
1008N/Avoid ParallelTaskTerminator::print_termination_counts() {
1008N/A gclog_or_tty->print_cr("ParallelTaskTerminator Total yields: %lld "
1008N/A "Total spins: %lld Total peeks: %lld",
1008N/A total_yields(),
1008N/A total_spins(),
1008N/A total_peeks());
1008N/A}
1008N/A#endif
1008N/A
1008N/Avoid ParallelTaskTerminator::reset_for_reuse() {
1008N/A if (_offered_termination != 0) {
1008N/A assert(_offered_termination == _n_threads,
1008N/A "Terminator may still be in use");
1008N/A _offered_termination = 0;
1008N/A }
1008N/A}
1008N/A
1008N/A#ifdef ASSERT
1008N/Abool ObjArrayTask::is_valid() const {
1008N/A return _obj != NULL && _obj->is_objArray() && _index > 0 &&
1008N/A _index < objArrayOop(_obj)->length();
1008N/A}
1008N/A#endif // ASSERT
1008N/A
1008N/Abool RegionTaskQueueWithOverflow::is_empty() {
1008N/A return (_region_queue.size() == 0) &&
1008N/A (_overflow_stack->length() == 0);
1008N/A}
1008N/A
1008N/Abool RegionTaskQueueWithOverflow::stealable_is_empty() {
1008N/A return _region_queue.size() == 0;
1008N/A}
1008N/A
1008N/Abool RegionTaskQueueWithOverflow::overflow_is_empty() {
1008N/A return _overflow_stack->length() == 0;
1008N/A}
1008N/A
1008N/Avoid RegionTaskQueueWithOverflow::initialize() {
1008N/A _region_queue.initialize();
1008N/A assert(_overflow_stack == 0, "Creating memory leak");
1008N/A _overflow_stack =
1008N/A new (ResourceObj::C_HEAP) GrowableArray<RegionTask>(10, true);
1008N/A}
1008N/A
1008N/Avoid RegionTaskQueueWithOverflow::save(RegionTask t) {
1008N/A if (TraceRegionTasksQueuing && Verbose) {
1008N/A gclog_or_tty->print_cr("CTQ: save " PTR_FORMAT, t);
1008N/A }
1008N/A if(!_region_queue.push(t)) {
1008N/A _overflow_stack->push(t);
1008N/A }
1008N/A}
1008N/A
1008N/A// Note that using this method will retrieve all regions
1008N/A// that have been saved but that it will always check
1008N/A// the overflow stack. It may be more efficient to
1008N/A// check the stealable queue and the overflow stack
1008N/A// separately.
1008N/Abool RegionTaskQueueWithOverflow::retrieve(RegionTask& region_task) {
1008N/A bool result = retrieve_from_overflow(region_task);
1008N/A if (!result) {
1008N/A result = retrieve_from_stealable_queue(region_task);
1008N/A }
1008N/A if (TraceRegionTasksQueuing && Verbose && result) {
1008N/A gclog_or_tty->print_cr(" CTQ: retrieve " PTR_FORMAT, result);
1008N/A }
1008N/A return result;
1008N/A}
1008N/A
1008N/Abool RegionTaskQueueWithOverflow::retrieve_from_stealable_queue(
1008N/A RegionTask& region_task) {
1008N/A bool result = _region_queue.pop_local(region_task);
1008N/A if (TraceRegionTasksQueuing && Verbose) {
1008N/A gclog_or_tty->print_cr("CTQ: retrieve_stealable " PTR_FORMAT, region_task);
1008N/A }
1008N/A return result;
1008N/A}
1008N/A
1008N/Abool
1008N/ARegionTaskQueueWithOverflow::retrieve_from_overflow(RegionTask& region_task) {
1008N/A bool result;
1008N/A if (!_overflow_stack->is_empty()) {
1008N/A region_task = _overflow_stack->pop();
1008N/A result = true;
1008N/A } else {
1008N/A region_task = (RegionTask) NULL;
1008N/A result = false;
1008N/A }
1008N/A if (TraceRegionTasksQueuing && Verbose) {
1008N/A gclog_or_tty->print_cr("CTQ: retrieve_stealable " PTR_FORMAT, region_task);
1008N/A }
1008N/A return result;
1008N/A}
1008N/A