schedulerbase.py revision cf22150eaeeb72431bf1cf65c309a431454fb22b
# -*- coding: utf-8 -*-
# $Id$
# pylint: disable=C0302
"""
Test Manager - Base class and utilities for the schedulers.
"""
__copyright__ = \
"""
Copyright (C) 2012-2014 Oracle Corporation
This file is part of VirtualBox Open Source Edition (OSE), as
available from http://www.virtualbox.org. This file is free software;
General Public License (GPL) as published by the Free Software
Foundation, in version 2 as it comes in the "COPYING" file of the
VirtualBox OSE distribution. VirtualBox OSE is distributed in the
hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
The contents of this file may alternatively be used under the terms
of the Common Development and Distribution License Version 1.0
(CDDL) only, as it comes in the "COPYING.CDDL" file of the
VirtualBox OSE distribution, in which case the provisions of the
CDDL are applicable instead of those of the GPL.
You may elect to license modified versions of this file under the
terms and conditions of either the GPL or the CDDL or both.
"""
__version__ = "$Revision$"
# Standard python imports.
import unittest;
# Validation Kit imports.
from testmanager import config;
class ReCreateQueueData(object):
"""
Data object for recreating a scheduling queue.
It's mostly a storage object, but has a few data checking operation
associated with it.
"""
#
# Load data from the database.
#
# Will extend the entries with aoTestCases and dTestCases members
# further down. checkForGroupDepCycles will add aidTestGroupPreReqs.
# aoTestCases entries are TestCaseData instance with iSchedPriority
# and idTestGroup added for our purposes.
# We will add oTestGroup and aoArgsVariations members to each further down.
# Load dependencies.
# aoTestCases entries are TestCaseArgsData instance with iSchedPriority
# and idTestGroup added for our purposes.
# We will add oTestGroup and oTestCase members to each further down.
#
# Generate global lookups.
#
# Generate a testcase lookup dictionary for use when working on
# argument variations.
# Generate a testgroup lookup dictionary.
#
# Associate extra members with the base data.
#
# Prep the test groups.
# Link testcases to their group, both directions. Prep testcases for
# argument varation association.
# Associate testcase argument variations with their testcases (group)
# in both directions.
if oTestCase.idTestCase != oArgVariation.idTestCase or oTestCase.idTestGroup != oArgVariation.idTestGroup:
else:
# done.
""" Returns a chain of IDs error entry. """
return aoErrors;
def checkForGroupDepCycles(self):
"""
Checks for testgroup depencency cycles and any missing testgroup
dependencies.
Returns array of errors (see SchedulderBase.recreateQueue()).
"""
if idPreReq is None:
continue;
while idPreReq is not None:
'TestGroup #%s prerequisite chain is too long!'
% (oTestGroup.idTestGroup,));
break;
if oDep is None:
'TestGroup #%s prerequisite #%s is not in the scheduling group!'
break;
return aoErrors;
"""
Checks that testcase dependencies stays within bounds. We do not allow
dependencies outside a testgroup, no dependency cycles or even remotely
long dependency chains.
Returns array of errors (see SchedulderBase.recreateQueue()).
"""
continue;
# Stupid recursion code using special stack(s).
else:
if oDep is None:
'TestCase #%s prerequisite #%s is not in the scheduling group!'
'TestCase #%s prerequisite #%s creates a cycle!'
pass;
else:
return aoErrors;
def deepTestGroupSort(self):
"""
Sorts the testgroups and their testcases by priority and dependencies.
Note! Don't call this before checking for dependency cycles!
"""
return;
#
# ASSUMES groups as well as testcases are sorted by priority by the
# database. So we only have to concern ourselves with the dependency
# sorting.
#
raise TMExceptionBase('Incorrectly sorted testgroups returned by database.');
raise TMExceptionBase('Incorrectly sorted testcases returned by database.');
#
# Sort the testgroups by dependencies.
#
i = 0;
if oTestGroup.idTestGroupPreReq is not None:
if iPreReq > i:
# The prerequisite is after the current entry. Move the
# current entry so that it's following it's prereq entry.
continue;
assert iPreReq < i;
i += 1; # Advance.
#
# Sort the testcases by dependencies.
# Same algorithm as above, just more prerequisites.
#
i = 0;
if iPreReq > i:
# The prerequisite is after the current entry. Move the
# current entry so that it's following it's prereq entry.
i -= 1; # Don't advance.
break;
assert iPreReq < i;
i += 1; # Advance.
return True;
class SchedQueueData(ModelDataBase):
"""
Scheduling queue data item.
"""
ksIdAttr = 'idSchedGroup';
ksParam_idSchedGroup = 'SchedQueueData_idSchedGroup';
ksParam_idItem = 'SchedQueueData_idItem';
ksParam_offQueue = 'SchedQueueData_offQueue';
ksParam_idGenTestCaseArgs = 'SchedQueueData_idGenTestCaseArgs';
ksParam_idTestGroup = 'SchedQueueData_idTestGroup';
ksParam_aidTestGroupPreReqs = 'SchedQueueData_aidTestGroupPreReqs';
ksParam_bmHourlySchedule = 'SchedQueueData_bmHourlySchedule';
ksParam_tsConfig = 'SchedQueueData_tsConfig';
ksParam_tsLastScheduled = 'SchedQueueData_tsLastScheduled';
ksParam_idTestSetGangLeader = 'SchedQueueData_idTestSetGangLeader';
ksParam_cMissingGangMembers = 'SchedQueueData_cMissingGangMembers';
kasAllowNullAttributes = [ 'idItem', 'offQueue', 'aidTestGroupPreReqs', 'bmHourlySchedule', 'idTestSetGangLeader',
'tsConfig', 'tsLastScheduled' ];
#
# Initialize with defaults.
# See the database for explanations of each of these fields.
#
self.idSchedGroup = None;
self.idGenTestCaseArgs = None;
self.idTestGroup = None;
self.aidTestGroupPreReqs = None;
self.bmHourlySchedule = None;
self.tsLastScheduled = None;
self.idTestSetGangLeader = None;
def initFromValues(self, idSchedGroup, idGenTestCaseArgs, idTestGroup, aidTestGroupPreReqs, # pylint: disable=R0913
idItem = None, offQueue = None, tsConfig = None, tsLastScheduled = None, idTestSetGangLeader = None):
"""
Reinitialize with all attributes potentially given as inputs.
Return self.
"""
return self;
"""
Initialize from database row (SELECT * FROM SchedQueues).
Returns self.
Raises exception if no row is specfied.
"""
if aoRow is None:
raise TMExceptionBase('SchedQueueData not found.');
return self;
class SchedulerBase(object):
"""
The scheduler base class.
The scheduler classes have two functions:
1. Recreate the scheduling queue.
2. Pick the next task from the queue.
The first is scheduler specific, the latter isn't.
"""
class BuildCache(object):
""" Build cache. """
class BuildCacheIterator(object):
""" Build class iterator. """
"""Returns self, required by the language."""
return self;
"""Returns the next build, raises StopIteration when the end has been reached."""
while True:
if oEntry is None:
raise StopIteration;
else:
return oEntry;
# end
class BuildCacheEntry(object):
""" Build cache entry. """
"""
Marks the cache entry as removed.
This doesn't actually remove it from the cache array, only marks
it as removed. It has no effect on open iterators.
"""
"""
Retrieves a cached prerequisite decision.
Returns boolean if found, None if not.
"""
"""
Caches a prerequistie decision.
"""
return fDecision;
""" Checks if the build is blacklisted. """
if self._fBlacklisted is None:
return self._fBlacklisted;
""" Configures the build cursor for the cache. """
return True;
"""Return an iterator."""
def fetchFromCursor(self):
""" Fetches a build from the cursor and adds it to the cache."""
return None;
try:
except:
return None;
if aoRow is None:
return None;
return oEntry;
self._asMessages = [];
"""
Instantiate the scheduler specified by the scheduling group.
Returns scheduler child class instance. May raise exception if
the input is invalid.
"""
else:
return oScheduler;
#
# Misc.
#
"""Debug printing."""
return None;
"""Info printing."""
return None;
"""Prints a debug message to the srv glue log (see config.py). """
return None;
def getElapsedSecs(self):
""" Returns the number of seconds this scheduling task has been running. """
#
# Create schedule.
#
"""
Cancels all pending gang gatherings on the current queue.
"""
'FROM SchedQueues\n'
'WHERE idSchedGroup = %s\n'
' AND idTestSetGangLeader is not NULL\n'
return True;
"""
Returns an array of queue items (SchedQueueData).
Child classes must override this.
"""
_ = oData;
return [];
def recreateQueueWorker(self):
"""
Worker for recreateQueue.
"""
#
# Collect the necessary data and validate it.
#
#
# The creation of the scheduling queue is done by the child class.
#
# We will try guess where in queue we're currently at and rotate
# the items such that we will resume execution in the approximately
# same position. The goal of the scheduler is to provide a 100%
# deterministic result so that if we regenerate the queue when there
# are no changes to the testcases, testgroups or scheduling groups
# involved, test execution will be unchanged (save for maybe just a
# little for gang gathering).
#
self._oDb.execute('SELECT offQueue FROM SchedQueues WHERE idSchedGroup = %s ORDER BY idItem LIMIT 1'
if offQueueNew != 0:
#
# Replace the scheduling queue.
# gathering state since these use the queue to set up the date.
#
self._oDb.execute('DELETE FROM SchedQueues WHERE idSchedGroup = %s\n', (self._oSchedGrpData.idSchedGroup,));
' idSchedGroup,\n'
' offQueue,\n'
' idGenTestCaseArgs,\n'
' idTestGroup,\n'
' aidTestGroupPreReqs,\n'
' bmHourlySchedule,\n'
' cMissingGangMembers )\n'
'VALUES ( %s, %s, %s, %s, %s, %s, %s )\n'
, ( oItem.idSchedGroup,
));
"""
(Re-)creates the scheduling queue for the given group.
Returns (asMessages, asMessages). On success the array with the error
will be empty, on failure it will contain (sError, oRelatedObject)
entries. The messages is for debugging and are simple strings.
Raises exception database error.
"""
aoErrors = [];
asMessages = [];
try:
#
# To avoid concurrency issues (SchedQueues) and inconsistent data (*),
# we lock quite a few tables while doing this work. We access more
# data than scheduleNewTask so we lock some additional tables.
#
oDb.execute('LOCK TABLE SchedGroups, SchedGroupMembers, TestGroups, TestGroupMembers IN SHARE MODE');
#
# Instantiate the scheduler and call the worker function.
#
else:
except:
raise;
return (aoErrors, asMessages);
#
# Schedule Task.
#
"""
Composes the gang specific testdriver arguments.
Returns command line string, including a leading space.
"""
return sArgs;
def composeExecResponseWorker(self, idTestSet, oTestEx, oTestBox, oBuild, oValidationKitBuild, sBaseUrl):
"""
Given all the bits of data, compose an EXEC command response to the testbox.
"""
assert oValidationKitBuild;
if sScriptZips is None:
else:
sScriptZips = sScriptZips.replace('@DOWNLOAD_BASE_URL@', sBaseUrl + config.g_ksTmDownloadBaseUrlRel);
cSecTimeout = oTestEx.cSecTimeout if oTestEx.cSecTimeout is not None else oTestEx.oTestCase.cSecTimeout;
dResponse = \
{
};
return dResponse;
"""
Composes an EXEC response for a gang member (other than the last).
"""
#
# Gather the necessary data.
#
oValidationKitBuild = None;
if oTestSet.idBuildTestSuite is not None:
#
# Instantiate the specified scheduler and let it do the rest.
#
assert oSchedGrpData.idBuildSrc is not None;
return oScheduler.composeExecResponseWorker(idTestSet, oTestEx, oTestBox, oBuild, oValidationKitBuild, sBaseUrl);
"""
Updates a gang schedule task.
"""
assert oTask.idTestSetGangLeader is not None;
if tsNow is not None:
' SET idTestSetGangLeader = %s,\n'
' cMissingGangMembers = %s,\n'
' tsLastScheduled = %s\n'
'WHERE idItem = %s\n'
else:
' SET cMissingGangMembers = %s\n'
'WHERE idItem = %s\n'
return True;
"""
The task has been scheduled successfully, reset it's data move it to
the end of the queue.
"""
if cGangMembers > 1:
' SET idItem = NEXTVAL(\'SchedQueueItemIdSeq\'),\n'
' idTestSetGangLeader = NULL,\n'
' cMissingGangMembers = %s\n'
'WHERE idItem = %s\n'
else:
' SET idItem = NEXTVAL(\'SchedQueueItemIdSeq\'),\n'
' idTestSetGangLeader = NULL,\n'
' cMissingGangMembers = 1,\n'
' tsLastScheduled = %s\n'
'WHERE idItem = %s\n'
return True;
"""
Creates a test set for using the given data.
Will not commit, someone up the callstack will that later on.
Returns the test set ID, may raise an exception on database error.
"""
# Lazy bird doesn't want to write testset.py and does it all here.
#
# We're getting the TestSet ID first in order to include it in the base
# file name (that way we can directly relate files on the disk to the
# test set when doing batch work), and also for idTesetSetGangLeader.
#
sBaseFilename = '%04d/%02d/%02d/%02d/TestSet-%s' \
#
# Gang scheduling parameters. Changes the oTask data for updating by caller.
#
iGangMemberNo = 0;
assert oTask.idTestSetGangLeader is None;
elif oTask.idTestSetGangLeader is None:
else:
#
# Do the database stuff.
#
' idTestSet,\n'
' tsConfig,\n'
' tsCreated,\n'
' idBuild,\n'
' idBuildCategory,\n'
' idBuildTestSuite,\n'
' idGenTestBox,\n'
' idTestBox,\n'
' idTestGroup,\n'
' idGenTestCase,\n'
' idTestCase,\n'
' idGenTestCaseArgs,\n'
' idTestCaseArgs,\n'
' sBaseFilename,\n'
' iGangMemberNo,\n'
' idTestSetGangLeader )\n'
'VALUES ( %s,\n' # idTestSet
' %s,\n' # tsConfig
' %s,\n' # tsCreated
' %s,\n' # idBuild
' %s,\n' # idBuildCategory
' %s,\n' # idBuildTestSuite
' %s,\n' # idGenTestBox
' %s,\n' # idTestBox
' %s,\n' # idTestGroup
' %s,\n' # idGenTestCase
' %s,\n' # idTestCase
' %s,\n' # idGenTestCaseArgs
' %s,\n' # idTestCaseArgs
' %s,\n' # sBaseFilename
' %s,\n' # iGangMemberNo
' %s)\n' # idTestSetGangLeader
, ( idTestSet,
));
' idTestResultParent,\n'
' idTestSet,\n'
' tsCreated,\n'
' idStrName,\n'
' cErrors,\n'
' enmStatus,\n'
' iNestingDepth)\n'
'VALUES ( NULL,\n' # idTestResultParent
' %s,\n' # idTestSet
' %s,\n' # tsCreated
' 0,\n' # idStrName
' 0,\n' # cErrors
' \'running\'::TestStatus_T,\n'
' 0)\n' # iNestingDepth
'RETURNING idTestResult'
' SET idTestResult = %s\n'
'WHERE idTestSet = %s\n'
, (idTestResult, idTestSet, ));
return idTestSet;
"""
Tries to find the most recent validation kit build suitable for the given testbox.
Returns BuildDataEx or None. Raise exception on database error.
Can be overridden by child classes to change the default build requirements.
"""
oBuildSource = BuildSourceData().initFromDbWithId(self._oDb, self._oSchedGrpData.idBuildSrcTestSuite, tsNow);
oCursor = BuildSourceLogic(self._oDb).openBuildCursor(oBuildSource, oTestBoxData.sOs, oTestBoxData.sCpuArch, tsNow);
return oBuild;
return None;
"""
Tries to find a fitting build.
Returns BuildDataEx or None. Raise exception on database error.
Can be overridden by child classes to change the default build requirements.
"""
#
# Gather the set of prerequisites we have and turn them into a value
# set for use in the loop below.
#
# Note! We're scheduling on testcase level and ignoring argument variation
# selections in TestGroupMembers is intentional.
#
dPreReqs = {};
# Direct prerequisites. We assume they're all enabled as this can be
# checked at queue creation time.
# Testgroup dependencies from the scheduling group config.
if oTask.aidTestGroupPreReqs is not None:
# Make sure the _active_ test group members are in the cache.
'FROM TestGroupMembers, TestCases\n'
'WHERE TestGroupMembers.idTestGroup = %s\n'
' AND TestGroupMembers.tsExpire > %s\n'
' AND TestGroupMembers.tsEffective <= %s\n'
' AND TestCases.idTestCase = TestGroupMembers.idTestCase\n'
' AND TestCases.tsExpire > %s\n'
' AND TestCases.tsEffective <= %s\n'
' AND TestCases.fEnabled is TRUE\n'
aidTestCases = [];
# Add the testgroup members to the prerequisites.
# Create a SQL values table out of them.
sPreReqSet = ''
#
# Try the builds.
#
self.oBuildCache.setupSource(self._oDb, self._oSchedGrpData.idBuildSrc, oTestBoxData.sOs, oTestBoxData.sCpuArch, tsNow);
#
# Check build requirements set by the test.
#
continue;
continue;
#
# Check prerequisites. The default scheduler is satisfied if one
# argument variation has been executed successfully. It is not
# satisfied if there are any failure runs.
#
if fDecision is None:
## @todo DB Tuning
# Check for missing prereqs.
'LEFT OUTER JOIN (SELECT *\n'
' FROM TestSets\n'
' WHERE enmStatus IN (%s, %s)\n'
' AND idBuild = %s\n'
' ) AS TestSets\n'
' ON (PreReqs.idTestCase = TestSets.idTestCase)\n'
'WHERE TestSets.idTestSet is NULL\n'
if cMissingPreReqs > 0:
continue;
# Check for failed prereq runs.
' TestSets\n'
'WHERE PreReqs.idTestCase = TestSets.idTestCase\n'
' AND TestSets.idBuild = %s\n'
' AND TestSets.enmStatus IN (%s, %s, %s)\n'
)
);
if cFailedPreReqs > 0:
continue;
elif not fDecision:
continue;
#
# If we can, check if the build files still exist.
#
continue;
return None;
"""
Tries to find a matching build for gang scheduling.
Returns BuildDataEx or None. Raise exception on database error.
Can be overridden by child classes to change the default build requirements.
"""
#
# Note! Should probably check build prerequisites if we get a different
# build back, so that we don't use a build which hasn't passed
# the smoke test.
#
_ = idBuildSrc;
return BuildLogic(self._oDb).tryFindSameBuildForOsArch(oLeaderBuild, oTestBoxData.sOs, oTestBoxData.sCpuArch);
"""
Try schedule the task as a gang leader (can be a gang of one).
Returns response or None. May raise exception on DB error.
"""
# We don't wait for busy resources, we just try the next test.
return None;
#
# Find a matching build (this is the difficult bit).
#
if oBuild is None:
return None;
if oValidationKitBuild is None:
return None;
else:
oValidationKitBuild = None;
#
# Create a testset, allocate the resources and update the state.
# Note! Since resource allocation may still fail, we create a nested
# transaction so we can roll back. (Heed lock warning in docs!)
#
if GlobalResourceLogic(self._oDb).allocateResources(oTestBoxData.idTestBox, oTestEx.aoGlobalRsrc, fCommit = False) \
is not True:
return False;
# We're alone, put the task back at the end of the queue and issue EXEC cmd.
dResponse = self.composeExecResponseWorker(idTestSet, oTestEx, oTestBoxData, oBuild, oValidationKitBuild, sBaseUrl);
else:
# We're missing gang members, issue WAIT cmd.
TestBoxStatusLogic(self._oDb).updateState(oTestBoxData.idTestBox, sTBState, idTestSet, fCommit = False);
return dResponse;
"""
Try schedule the task as a gang member.
Returns response or None. May raise exception on DB error.
"""
#
# The leader has choosen a build, we need to find a matching one for our platform.
# (It's up to the scheduler decide upon how strict dependencies are to be enforced
# upon subordinate group members.)
#
if oBuild is None:
return None;
oValidationKitBuild = None;
if oLeaderTestSet.idBuildTestSuite is not None:
oLeaderValidationKitBit = BuildDataEx().initFromDbWithId(self._oDb, oLeaderTestSet.idBuildTestSuite);
#
# Create a testset and update the state(s).
#
# The whole gang is there, move the task to the end of the queue
# and update the status on the other gang members.
dResponse = self.composeExecResponseWorker(idTestSet, oTestEx, oTestBoxData, oBuild, oValidationKitBuild, sBaseUrl);
else:
# We're still missing some gang members, issue WAIT cmd.
return dResponse;
"""
Worker for schduling a new task.
"""
#
# Iterate the scheduler queue (fetch all to avoid having to concurrent
# queries), trying out each task to see if the testbox can execute it.
#
dRejected = {}; # variations we've already checked out and rejected.
'FROM SchedQueues\n'
'WHERE idSchedGroup = %s\n'
' AND ( bmHourlySchedule IS NULL\n'
' OR get_bit(bmHourlySchedule, %s) = 1 )\n'
'ORDER BY idItem ASC\n'
# Don't loop forever.
break;
# Unpack the data and check if we've rejected the testcasevar/group variation already (they repeat).
self.dprint('** Considering: idItem=%s idGenTestCaseArgs=%s idTestGroup=%s Deps=%s last=%s cfg=%s\n'
continue;
# Fetch all the test case info (too much, but who cares right now).
self.dprint('TestCase "%s": %s %s' % (oTestEx.oTestCase.sName, oTestEx.oTestCase.sBaseCmd, oTestEx.sArgs,));
# This shouldn't happen, but just in case it does...
continue;
# Check if the testbox properties matches the test.
continue;
# Try schedule it.
else:
dResponse = None; # Shouldn't happen!
if dResponse is not None:
return dResponse;
# Found no suitable task.
return None;
"""
Schedules a new task.
"""
try:
#
# To avoid concurrency issues in SchedQueues we lock all the rows
# related to our scheduling queue. Also, since this is a very
# expensive operation we lock the testbox status row to fend of
# repeated retires by fault testbox script.
#
% (oTestBoxData.idTestBox,));
% (oTestBoxData.idSchedGroup,));
# We need the current timestamp.
# Re-read the testbox data ...
if oTestBoxDataCur.fEnabled \
# ... and schedule group data.
#
# Instantiate the specified scheduler and let it do the rest.
#
if dResponse is not None:
return dResponse;
except:
raise;
# Not enabled, rollback and return no task.
return None;
"""
Try canceling a gang gathering.
Returns True if successfully cancelled.
Returns False if not (someone raced us to the SchedQueue table).
Note! oStatusData is re-initialized.
"""
try:
#
# Lock the tables we're updating so we don't run into concurrency
# issues (we're racing both scheduleNewTask and other callers of
# this method).
#
#
# Re-read the testbox data and check that we're still in the same state.
#
#
# Get the leader thru the test set and change the state of the whole gang.
#
#
# Move the scheduling queue item to the end.
#
'FROM SchedQueues\n'
'WHERE idTestSetGangLeader = %s\n'
, (oTestSetData.idTestSetGangLeader,) );
' SET idItem = NEXTVAL(\'SchedQueueItemIdSeq\'),\n'
' idTestSetGangLeader = NULL,\n'
' cMissingGangMembers = %s\n'
'WHERE idItem = %s\n'
return True;
return True;
except:
raise;
# Not enabled, rollback and return no task.
return False;
#
# Unit testing.
#
# pylint: disable=C0111
if __name__ == '__main__':
# not reached.