From: Francis Deslauriers <francis.deslauriers@efficios.com>
Date: Mon, 12 Feb 2018 21:32:27 +0000 (-0500)
Subject: jjb: lava: keep track of job exit status and configuration
X-Git-Url: http://git.lttng.org./?a=commitdiff_plain;h=802e75a7c5707f979e17d907fda9821946c2a2dd;p=lttng-ci.git

jjb: lava: keep track of job exit status and configuration

Completely rework the system trigger jobs.

Before those changes a failed sub-job would make the umbrella job fail
once but the next time the umbrella job runs, it would see no new
commit, omit the schedule the sub-job again, and return success.
Failures were forgotten after one day.

We now save on the workspace the exit status and configuration of every
sub-job. This way we can avoid re-running sub-jobs that have jailed in
the past and haven't changed since. An umbrella jobs containing such
sub-job will now directly marked as failed.

The umbrella job will keep the failed status until all sub-jobs succeed.

Signed-off-by: Francis Deslauriers <francis.deslauriers@efficios.com>
---

diff --git a/scripts/system-tests/system-trigger.groovy b/scripts/system-tests/system-trigger.groovy
index 6833c2a..db5b1b5 100644
--- a/scripts/system-tests/system-trigger.groovy
+++ b/scripts/system-tests/system-trigger.groovy
@@ -20,7 +20,6 @@ import hudson.model.*
 import java.io.File
 import org.eclipse.jgit.api.Git
 import org.eclipse.jgit.lib.Ref
-import groovy.transform.EqualsAndHashCode
 
 class InvalidKVersionException extends Exception {
   public InvalidKVersionException(String message) {
@@ -163,56 +162,38 @@ class VanillaKVersion implements Comparable<VanillaKVersion> {
   }
 }
 
-@EqualsAndHashCode(includeFields=true)
-class RunConfiguration {
-  def linuxBranch
-  def linuxTagId
-  def lttngBranch
-  def lttngModulesCommitId
-  def lttngToolsCommitId
-  def lttngUstCommitId
-  RunConfiguration(linuxBranch, linuxTagId, lttngBranch, lttngToolsCommitId,
-                  lttngModulesCommitId, lttngUstCommitId) {
-    this.linuxBranch = linuxBranch
-    this.linuxTagId = linuxTagId
-    this.lttngBranch = lttngBranch
-    this.lttngModulesCommitId = lttngModulesCommitId
-    this.lttngToolsCommitId = lttngToolsCommitId
-    this.lttngUstCommitId = lttngUstCommitId
-  }
-
-  String toString() {
-    return "${this.linuxBranch}:{${this.linuxTagId}}, ${this.lttngBranch}" +
-      ":{${this.lttngModulesCommitId}, ${this.lttngToolsCommitId}," +
-      "${this.lttngUstCommitId}}"
-  }
-}
-
-def LoadPreviousIdsFromWorkspace = { ondiskpath ->
-  def previousIds = []
+// Save the hashmap containing all the jobs and their status to disk. We can do
+// that because this job is configured to always run on the master node on
+// Jenkins.
+def SaveCurrentJobsToWorkspace = { currentJobs, ondiskpath->
   try {
     File myFile = new File(ondiskpath);
-    def input = new ObjectInputStream(new FileInputStream(ondiskpath))
-    previousIds = input.readObject()
-    input.close()
+    myFile.createNewFile();
+    def out = new ObjectOutputStream(new FileOutputStream(ondiskpath))
+    out.writeObject(currentJobs)
+    out.close()
   } catch (e) {
-    println("Failed to load previous Git object IDs from disk." + e);
+    println("Failed to save previous Git object IDs to disk." + e);
   }
-  return previousIds
 }
 
-def saveCurrentIdsToWorkspace = { currentIds, ondiskpath ->
+// Load the hashmap containing all the jobs and their last status from disk.
+// It's possible because this job is configured to always run on the master
+// node on Jenkins
+def LoadPreviousJobsFromWorkspace = { ondiskpath ->
+  def previousJobs = [:]
   try {
     File myFile = new File(ondiskpath);
-    myFile.createNewFile();
-    def out = new ObjectOutputStream(new FileOutputStream(ondiskpath))
-    out.writeObject(currentIds)
-    out.close()
+    def input = new ObjectInputStream(new FileInputStream(ondiskpath))
+    previousJobs = input.readObject()
+    input.close()
   } catch (e) {
-    println("Failed to save previous Git object IDs from disk." + e);
+    println("Failed to load previous runs from disk." + e);
   }
+  return previousJobs
 }
 
+
 def GetHeadCommits = { remoteRepo, branchesOfInterest ->
   def remoteHeads = [:]
   def remoteHeadRefs = Git.lsRemoteRepository()
@@ -282,11 +263,11 @@ def GetLastTagIds = { remoteRepo, branchesOfInterest ->
   return remoteLastTagCommit
 }
 
-def CraftJobName = { jobType, runConfig ->
-  return "${jobType}_k${runConfig.linuxBranch}_l${runConfig.lttngBranch}"
+def CraftJobName = { jobType, linuxBranch, lttngBranch ->
+  return "${jobType}_k${linuxBranch}_l${lttngBranch}"
 }
 
-def LaunchJob = { jobName, runConfig ->
+def LaunchJob = { jobName, jobInfo ->
   def job = Hudson.instance.getJob(jobName)
   def params = []
   for (paramdef in job.getProperty(ParametersDefinitionProperty.class).getParameterDefinitions()) {
@@ -297,10 +278,10 @@ def LaunchJob = { jobName, runConfig ->
     }
   }
 
-  params.add(new StringParameterValue('LTTNG_TOOLS_COMMIT_ID', runConfig.lttngToolsCommitId))
-  params.add(new StringParameterValue('LTTNG_MODULES_COMMIT_ID', runConfig.lttngModulesCommitId))
-  params.add(new StringParameterValue('LTTNG_UST_COMMIT_ID', runConfig.lttngUstCommitId))
-  params.add(new StringParameterValue('KERNEL_TAG_ID', runConfig.linuxTagId))
+  params.add(new StringParameterValue('LTTNG_TOOLS_COMMIT_ID', jobInfo['config']['toolsCommit']))
+  params.add(new StringParameterValue('LTTNG_MODULES_COMMIT_ID', jobInfo['config']['modulesCommit']))
+  params.add(new StringParameterValue('LTTNG_UST_COMMIT_ID', jobInfo['config']['ustCommit']))
+  params.add(new StringParameterValue('KERNEL_TAG_ID', jobInfo['config']['linuxTagID']))
   def currBuild = job.scheduleBuild2(0, new Cause.UpstreamCause(build), new ParametersAction(params))
 
   if (currBuild != null ) {
@@ -317,10 +298,7 @@ final String modulesRepo = "https://github.com/lttng/lttng-modules.git"
 final String ustRepo = "https://github.com/lttng/lttng-ust.git"
 final String linuxRepo = "git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable.git"
 
-final String toolsOnDiskPath = build.getEnvironment(listener).get('WORKSPACE') + "/on-disk-tools-ref"
-final String modulesOnDiskPath = build.getEnvironment(listener).get('WORKSPACE') + "/on-disk-modules-ref"
-final String ustOnDiskPath = build.getEnvironment(listener).get('WORKSPACE') + "/on-disk-ust-ref"
-final String linuxOnDiskPath = build.getEnvironment(listener).get('WORKSPACE') + "/on-disk-linux-ref"
+final String pastJobsPath = build.getEnvironment(listener).get('WORKSPACE') + "/pastjobs";
 
 def recentLttngBranchesOfInterest = ['master', 'stable-2.10', 'stable-2.9']
 def recentLinuxBranchesOfInterest = ['master', 'linux-4.9.y', 'linux-4.4.y']
@@ -354,104 +332,20 @@ def ustHeadCommits = GetHeadCommits(ustRepo, lttngBranchesOfInterest)
 // For Linux branches, we look for new non-RC tags.
 def linuxLastTagIds = GetLastTagIds(linuxRepo, linuxBranchesOfInterest)
 
-// Load previously built Linux tag ids.
-println("Loading Git object IDs of previously built projects from the workspace.");
-def oldLinuxTags = LoadPreviousIdsFromWorkspace(linuxOnDiskPath) as Set
-
-// Load previously built LTTng commit ids.
-def oldToolsHeadCommits = LoadPreviousIdsFromWorkspace(toolsOnDiskPath) as Set
-def oldModulesHeadCommits = LoadPreviousIdsFromWorkspace(modulesOnDiskPath) as Set
-def oldUstHeadCommits = LoadPreviousIdsFromWorkspace(ustOnDiskPath) as Set
-
-def newOldLinuxTags = oldLinuxTags
-def newOldToolsHeadCommits = oldToolsHeadCommits
-def newOldModulesHeadCommits = oldModulesHeadCommits
-def newOldUstHeadCommits = oldUstHeadCommits
-
-// Canary jobs are run daily to make sure the lava pipeline is working properly.
-def canaryRunConfigs = [] as Set
-canaryRunConfigs.add(
-    ['v4.4.9', '1a1a512b983108015ced1e7a7c7775cfeec42d8c', 'v2.8.1','d11e0db', '7fd9215', '514a87f'] as RunConfiguration)
-
-def runConfigs = [] as Set
-
-// For each top of branch kernel tags that were not seen before, schedule one
-// job for each lttng/linux tracked configurations.
-linuxLastTagIds.each { linuxTag ->
-  if (!oldLinuxTags.contains(linuxTag.value)) {
-    lttngBranchesOfInterest.each { lttngBranch ->
-      if (configurationOfInterest.contains([lttngBranch, linuxTag.key])) {
-        runConfigs.add([linuxTag.key, linuxTag.value,
-                    lttngBranch, toolsHeadCommits[lttngBranch],
-                    modulesHeadCommits[lttngBranch], ustHeadCommits[lttngBranch]]
-                    as RunConfiguration)
-
-        newOldLinuxTags.add(linuxTag.value)
-      }
-    }
-  }
+def CraftConfig = { linuxBr, lttngBr ->
+  def job = [:];
+  job['config'] = [:];
+  job['config']['linuxBranch'] = linuxBr;
+  job['config']['lttngBranch'] = lttngBr;
+  job['config']['linuxTagID'] = linuxLastTagIds[linuxBr];
+  job['config']['toolsCommit'] = toolsHeadCommits[lttngBr];
+  job['config']['modulesCommit'] = modulesHeadCommits[lttngBr];
+  job['config']['ustCommit'] = ustHeadCommits[lttngBr];
+  job['status'] = 'NOT_SET';
+  job['build'] = null;
+  return job;
 }
 
-// For each top of branch commits of LTTng-Tools that were not seen before,
-// schedule one job for each lttng/linux tracked configurations
-toolsHeadCommits.each { toolsHead ->
-  if (!oldToolsHeadCommits.contains(toolsHead.value)) {
-    linuxLastTagIds.each { linuxTag ->
-      def lttngBranch = toolsHead.key
-      if (configurationOfInterest.contains([lttngBranch, linuxTag.key])) {
-        runConfigs.add([linuxTag.key, linuxTag.value,
-                    lttngBranch, toolsHeadCommits[lttngBranch],
-                    modulesHeadCommits[lttngBranch], ustHeadCommits[lttngBranch]]
-                    as RunConfiguration)
-
-        newOldToolsHeadCommits.add(toolsHead.value)
-      }
-    }
-  }
-}
-
-// For each top of branch commits of LTTng-Modules that were not seen before,
-// schedule one job for each lttng/linux tracked configurations
-modulesHeadCommits.each { modulesHead ->
-  if (!oldModulesHeadCommits.contains(modulesHead.value)) {
-    linuxLastTagIds.each { linuxTag ->
-      def lttngBranch = modulesHead.key
-      if (configurationOfInterest.contains([lttngBranch, linuxTag.key])) {
-        runConfigs.add([linuxTag.key, linuxTag.value,
-                    lttngBranch, toolsHeadCommits[lttngBranch],
-                    modulesHeadCommits[lttngBranch], ustHeadCommits[lttngBranch]]
-                    as RunConfiguration)
-
-        newOldModulesHeadCommits.add(modulesHead.value)
-      }
-    }
-  }
-}
-
-// For each top of branch commits of LTTng-UST that were not seen before,
-// schedule one job for each lttng/linux tracked configurations
-ustHeadCommits.each { ustHead ->
-  if (!oldUstHeadCommits.contains(ustHead.value)) {
-    linuxLastTagIds.each { linuxTag ->
-      def lttngBranch = ustHead.key
-      if (configurationOfInterest.contains([lttngBranch, linuxTag.key])) {
-        runConfigs.add([linuxTag.key, linuxTag.value,
-                    lttngBranch, toolsHeadCommits[lttngBranch],
-                    modulesHeadCommits[lttngBranch], ustHeadCommits[lttngBranch]]
-                    as RunConfiguration)
-
-        newOldUstHeadCommits.add(ustHead.value)
-      }
-    }
-  }
-}
-
-def ongoingBuild = [:]
-def failedRuns = []
-def abortedRuns = []
-def isFailed = false
-def isAborted = false
-
 // Check what type of jobs should be triggered.
 triggerJobName = build.project.getFullDisplayName();
 if (triggerJobName.contains("vm_tests")) {
@@ -462,97 +356,122 @@ if (triggerJobName.contains("vm_tests")) {
   jobType = 'baremetal_benchmarks';
 }
 
-// Launch regular jobs.
-if (runConfigs.size() > 0) {
-  println("\nSchedule jobs triggered by code changes:");
-  runConfigs.each { config ->
-    def jobName = CraftJobName(jobType, config);
-    def currBuild = LaunchJob(jobName, config);
+// Hashmap containing all the jobs, their configuration (commit id, etc. )and
+// their status (SUCCEEDED, FAILED, etc.). This Hashmap is made of basic strings
+// rather than objects and enums because strings are easily serializable.
+def currentJobs = [:];
 
-    // LaunchJob will return null if the job doesn't exist or is disabled.
-    if (currBuild != null) {
-      ongoingBuild[jobName] = currBuild;
-    }
+// Get an up to date view of all the branches of interest.
+configurationOfInterest.each { lttngBr, linuxBr  ->
+  def jobName = CraftJobName(jobType, linuxBr, lttngBr);
+  currentJobs[jobName] = CraftConfig(linuxBr, lttngBr);
 
-    // Jobs to run only on master branchs of both Linux and LTTng.
-    if (config.linuxBranch.contains('master') &&
-        config.lttngBranch.contains('master')) {
-      // vm_tests specific.
-      if (jobType.contains("vm_tests")) {
-        jobName = CraftJobName('vm_tests_fuzzing', config);
-        currBuild = LaunchJob(jobName, config);
-
-        // LaunchJob will return null if the job doesn't exist or is disabled.
-        if (currBuild != null) {
-          ongoingBuild[jobName] = currBuild;
-        }
-      }
-    }
+  // Add fuzzing job in vm_tests on master branches of lttng and linux.
+  if (jobType == 'vm_tests' && lttngBr == 'master' && linuxBr == 'master') {
+    def vmFuzzingJobName = CraftJobName(jobType + '_fuzzing', linuxBr, lttngBr);
+    currentJobs[vmFuzzingJobName] = CraftConfig(linuxBr, lttngBr);
   }
-} else {
-  println("No new commit or tags, nothing more to do.")
 }
 
-// Launch canary jobs.
-println("\nSchedule canary jobs once a day:")
-canaryRunConfigs.each { config ->
-  def jobName = jobType + '_canary';
-  def currBuild = LaunchJob(jobName, config);
+//Add canary job
+def jobNameCanary = jobType + "_canary";
+currentJobs[jobNameCanary] = [:];
+currentJobs[jobNameCanary]['config'] = [:];
+currentJobs[jobNameCanary]['config']['linuxBranch'] = 'v4.4.9';
+currentJobs[jobNameCanary]['config']['lttngBranch'] = 'v2.8.1';
+currentJobs[jobNameCanary]['config']['linuxTagID'] ='1a1a512b983108015ced1e7a7c7775cfeec42d8c';
+currentJobs[jobNameCanary]['config']['toolsCommit'] = 'd11e0db'
+currentJobs[jobNameCanary]['config']['modulesCommit'] = '7fd9215'
+currentJobs[jobNameCanary]['config']['ustCommit'] = '514a87f'
+currentJobs[jobNameCanary]['status'] = 'NOT_SET';
+currentJobs[jobNameCanary]['build'] = null;
+
+def pastJobs = LoadPreviousJobsFromWorkspace(pastJobsPath);
 
-  // LaunchJob will return null if the job doesn't exist or is disabled.
-  if (currBuild != null) {
-    ongoingBuild[jobName] = currBuild;
+def failedRuns = []
+def abortedRuns = []
+def isFailed = false
+def isAborted = false
+def ongoingJobs = 0;
+
+currentJobs.each { jobName, jobInfo ->
+  // If the job ran in the past, we check if the IDs changed since.
+  if (pastJobs.containsKey(jobName) && !jobName.contains('_canary')) {
+    pastJob = pastJobs[jobName];
+    // Have the IDs changed?
+    if (pastJob['config'] == jobInfo['config']) {
+      // if the config has not changed, we keep it.
+      // if it's failed, we don't launch a new job and keep it failed.
+      jobInfo['status'] = pastJob['status'];
+      if (pastJob['status'] == 'FAILED') {
+        println("${jobName} as not changed since the last failed run. Don't run it again.");
+        // Marked the umbrella job for failure but still run the jobs that since the
+        // last run.
+        isFailed = true;
+        return;
+      } else if (pastJob['status'] == 'ABORTED') {
+        println("${jobName} as not changed since last aborted run. Run it again.");
+      } else if (pastJob['status'] == 'SUCCEEDED') {
+        println("${jobName} as not changed since the last successful run. Don't run it again.");
+        return;
+      }
+    }
   }
+
+  jobInfo['status'] = 'PENDING';
+  jobInfo['build'] = LaunchJob(jobName, jobInfo);
+  ongoingJobs += 1;
 }
 
-// Save the tag and commit IDs scheduled in the past and during this run to the
-// workspace. We save it at the end to be sure all jobs were launched. We save
-// the object IDs even in case of failure. There is no point of re-running the
-// same job is there are no code changes even in case of failure.
-println("Saving Git object IDs of previously built projects to the workspace.");
-saveCurrentIdsToWorkspace(newOldLinuxTags, linuxOnDiskPath);
-saveCurrentIdsToWorkspace(newOldToolsHeadCommits, toolsOnDiskPath);
-saveCurrentIdsToWorkspace(newOldModulesHeadCommits, modulesOnDiskPath);
-saveCurrentIdsToWorkspace(newOldUstHeadCommits, ustOnDiskPath);
-
-// Iterate over all the running jobs. Record the status of completed jobs.
-while (ongoingBuild.size() > 0) {
-  def ongoingIterator = ongoingBuild.iterator();
-  while (ongoingIterator.hasNext()) {
-    currentBuild = ongoingIterator.next();
-
-    jobName = currentBuild.getKey();
-    job_run = currentBuild.getValue();
+while (ongoingJobs > 0) {
+  currentJobs.each { jobName, jobInfo ->
+
+    if (jobInfo['status'] != 'PENDING') {
+      return;
+    }
+
+    jobBuild = jobInfo['build']
 
     // The isCancelled() method checks if the run was cancelled before
     // execution. We consider such run as being aborted.
-    if (job_run.isCancelled()) {
+    if (jobBuild.isCancelled()) {
       println("${jobName} was cancelled before launch.")
-      abortedRuns.add(jobName);
       isAborted = true;
-      ongoingIterator.remove();
-    } else if (job_run.isDone()) {
+      abortedRuns.add(jobName);
+      ongoingJobs -= 1;
+      jobInfo['status'] = 'ABORTED'
+      // Invalidate the build field, as it's not serializable and we don't need
+      // it anymore.
+      jobInfo['build'] = null;
+    } else if (jobBuild.isDone()) {
+
+      jobExitStatus = jobBuild.get();
 
-      job_status = job_run.get();
-      println("${job_status.fullDisplayName} completed with status ${job_status.result}.");
+      // Invalidate the build field, as it's not serializable and we don't need
+      // it anymore.
+      jobInfo['build'] = null;
+      println("${jobExitStatus.fullDisplayName} completed with status ${jobExitStatus.result}.");
 
       // If the job didn't succeed, add its name to the right list so it can
       // be printed at the end of the execution.
-      switch (job_status.result) {
+      ongoingJobs -= 1;
+      switch (jobExitStatus.result) {
       case Result.ABORTED:
         isAborted = true;
         abortedRuns.add(jobName);
+        jobInfo['status'] = 'ABORTED'
         break;
       case Result.FAILURE:
         isFailed = true;
         failedRuns.add(jobName);
+        jobInfo['status'] = 'FAILED'
         break;
       case Result.SUCCESS:
+        jobInfo['status'] = 'SUCCEEDED'
+        break;
       default:
         break;
       }
-
-      ongoingIterator.remove();
     }
   }
 
@@ -569,6 +488,9 @@ while (ongoingBuild.size() > 0) {
   }
 }
 
+//All jobs are done running. Save their exit status to disk.
+SaveCurrentJobsToWorkspace(currentJobs, pastJobsPath);
+
 // Get log of failed runs.
 if (failedRuns.size() > 0) {
   println("Failed job(s):");