Canary analysis with prometheus: Error occurred during task metricSetMixer

#1

when I try to run a canary pipeline(prometheus),I get an error:

I got kayenta log :

2019-04-16 08:10:33.050  WARN 1 --- [    handlers-20] c.n.s.o.e.DefaultExceptionHandler        : Error occurred during task metricSetMixer
 java.lang.IllegalArgumentException: -25923367
	at java.util.stream.DoublePipeline.limit(DoublePipeline.java:338) ~[na:1.8.0_191]
	at com.netflix.kayenta.metrics.MetricSetMixerService.makeTemplate(MetricSetMixerService.java:153) ~[kayenta-core.jar:na]
	at com.netflix.kayenta.metrics.MetricSetMixerService.mixOneMetric(MetricSetMixerService.java:138) ~[kayenta-core.jar:na]
	at com.netflix.kayenta.metrics.MetricSetMixerService.mixAll(MetricSetMixerService.java:197) ~[kayenta-core.jar:na]
	at com.netflix.kayenta.metrics.orca.MetricSetMixerServiceTask.execute(MetricSetMixerServiceTask.java:110) ~[kayenta-core.jar:na]
	at com.netflix.spinnaker.orca.q.handler.RunTaskHandler$handle$1$2$1.invoke(RunTaskHandler.kt:100) ~[orca-queue-6.139.0.jar:6.139.0]
	at com.netflix.spinnaker.orca.q.handler.RunTaskHandler$handle$1$2$1.invoke(RunTaskHandler.kt:56) ~[orca-queue-6.139.0.jar:6.139.0]
	at com.netflix.spinnaker.orca.q.handler.RunTaskHandler.withLoggingContext(RunTaskHandler.kt:316) [orca-queue-6.139.0.jar:6.139.0]
	at com.netflix.spinnaker.orca.q.handler.RunTaskHandler.access$withLoggingContext(RunTaskHandler.kt:56) [orca-queue-6.139.0.jar:6.139.0]
	at com.netflix.spinnaker.orca.q.handler.RunTaskHandler$handle$1$2.invoke(RunTaskHandler.kt:99) ~[orca-queue-6.139.0.jar:6.139.0]
	at com.netflix.spinnaker.orca.q.handler.RunTaskHandler$handle$1$2.invoke(RunTaskHandler.kt:56) ~[orca-queue-6.139.0.jar:6.139.0]
	at com.netflix.spinnaker.orca.q.handler.AuthenticationAware$sam$java_util_concurrent_Callable$0.call(AuthenticationAware.kt) ~[orca-queue-6.139.0.jar:6.139.0]
	at com.netflix.spinnaker.security.AuthenticatedRequest.lambda$propagate$0(AuthenticatedRequest.java:97) ~[kork-security-3.9.2.jar:3.9.2]
	at com.netflix.spinnaker.orca.q.handler.AuthenticationAware$DefaultImpls.withAuth(AuthenticationAware.kt:49) ~[orca-queue-6.139.0.jar:6.139.0]
	at com.netflix.spinnaker.orca.q.handler.RunTaskHandler.withAuth(RunTaskHandler.kt:56) [orca-queue-6.139.0.jar:6.139.0]
	at com.netflix.spinnaker.orca.q.handler.RunTaskHandler$handle$1.invoke(RunTaskHandler.kt:98) [orca-queue-6.139.0.jar:6.139.0]
	at com.netflix.spinnaker.orca.q.handler.RunTaskHandler$handle$1.invoke(RunTaskHandler.kt:56) [orca-queue-6.139.0.jar:6.139.0]
	at com.netflix.spinnaker.orca.q.handler.RunTaskHandler$withTask$1.invoke(RunTaskHandler.kt:182) [orca-queue-6.139.0.jar:6.139.0]
	at com.netflix.spinnaker.orca.q.handler.RunTaskHandler$withTask$1.invoke(RunTaskHandler.kt:56) [orca-queue-6.139.0.jar:6.139.0]
	at com.netflix.spinnaker.orca.q.handler.OrcaMessageHandler$withTask$1.invoke(OrcaMessageHandler.kt:49) [orca-queue-6.139.0.jar:6.139.0]
	at com.netflix.spinnaker.orca.q.handler.OrcaMessageHandler$withTask$1.invoke(OrcaMessageHandler.kt:33) [orca-queue-6.139.0.jar:6.139.0]
	at com.netflix.spinnaker.orca.q.handler.OrcaMessageHandler$withStage$1.invoke(OrcaMessageHandler.kt:59) [orca-queue-6.139.0.jar:6.139.0]
	at com.netflix.spinnaker.orca.q.handler.OrcaMessageHandler$withStage$1.invoke(OrcaMessageHandler.kt:33) [orca-queue-6.139.0.jar:6.139.0]
	at com.netflix.spinnaker.orca.q.handler.OrcaMessageHandler$DefaultImpls.withExecution(OrcaMessageHandler.kt:68) [orca-queue-6.139.0.jar:6.139.0]
	at com.netflix.spinnaker.orca.q.handler.RunTaskHandler.withExecution(RunTaskHandler.kt:56) [orca-queue-6.139.0.jar:6.139.0]
	at com.netflix.spinnaker.orca.q.handler.OrcaMessageHandler$DefaultImpls.withStage(OrcaMessageHandler.kt:55) [orca-queue-6.139.0.jar:6.139.0]
	at com.netflix.spinnaker.orca.q.handler.RunTaskHandler.withStage(RunTaskHandler.kt:56) [orca-queue-6.139.0.jar:6.139.0]
	at com.netflix.spinnaker.orca.q.handler.OrcaMessageHandler$DefaultImpls.withTask(OrcaMessageHandler.kt:42) [orca-queue-6.139.0.jar:6.139.0]
	at com.netflix.spinnaker.orca.q.handler.RunTaskHandler.withTask(RunTaskHandler.kt:56) [orca-queue-6.139.0.jar:6.139.0]

Then I search the code for kayenta :code are

  List<Double> values = DoubleStream
      .generate(() -> Double.NaN)
      .limit(template.expectedDataPoints())
      .boxed()
      .collect(Collectors.toList());

  public long expectedDataPoints() {
    if (stepMillis == 0) {
      return 0;
    }
    log.info("endTimeMillis is {} ,startTimeMillis is {}.", endTimeMillis, startTimeMillis);
    return (endTimeMillis - startTimeMillis) / stepMillis;
  }

When .limit(negative),got the error.
startTimeMillis=1555402049702 step:60000
(0-1555402049702)/60000=-25923367,that’s exactly what the graph says is wrong.
I guess endTimeMillis must be 0.

prometheus api is

(1):/api/v1/query_range?query=avg(requests{http_code = "500" , kubernetes_pod_name =~"^sampleapp-canary.*"})&start=2019-04-16T08:07:29.702Z&end=2019-04-16T08:10:29.702Z&step=60 
result is:
{
	"status": "success",
	"data": {
		"resultType": "matrix",
		"result": [{
			"metric": {},
			"values": [
				[1555402049.702, "44145"],
				[1555402109.702, "44579"],
				[1555402169.702, "44997"],
				[1555402229.702, "45392"]
			]
		}]
	}
}
(2)
/api/v1/query_range?query=avg(requests{http_code = "500" , kubernetes_pod_name =~"^sampleapp-baseline.*"})&start=2019-04-16T08:07:29.702Z&end=2019-04-16T08:10:29.702Z&step=60
{
	"status": "success",
	"data": {
		"resultType": "matrix",
		"result": [{
			"metric": {},
			"values": [
				[1555402049.702, "833775"],
				[1555402109.702, "834308"],
				[1555402169.702, "834820"],
				[1555402229.702, "835328"]
			]
		}]
	}
}

some configs :
1.canary config is :

canary:
    enabled: true
    serviceIntegrations:
    - name: google
      enabled: false
      accounts: []
      gcsEnabled: false
      stackdriverEnabled: false
    - name: prometheus
      enabled: true
      accounts:
      - name: my-prometheus
        endpoint:
          baseUrl: http://cce-monitor-clusterip.kube-system:9090
        supportedTypes:
        - METRICS_STORE
    - name: datadog
      enabled: false
      accounts: []
    - name: signalfx
      enabled: false
      accounts: []
    - name: aws
      enabled: true
      accounts:
      - name: test001
        bucket: spin-0bc2e08a-7816-416b-bcf5-3089bfafe620
        rootFolder: kayenta
        endpoint: http://182.61.16.146:9000
        accessKeyId: miaotest
        secretAccessKey: miaotest123
        supportedTypes:
        - CONFIGURATION_STORE
        - OBJECT_STORE
      s3Enabled: true
    reduxLoggerEnabled: true
    defaultMetricsAccount: my-prometheus
    defaultStorageAccount: test001
    defaultJudge: NetflixACAJudge-v1.0
    defaultMetricsStore: prometheus
    stagesEnabled: true
    templatesEnabled: true
    showAllConfigsEnabled: true

2.pipeline config is

    {
      "appConfig": {},
      "description": "This pipeline deploys a canary version of the application, and a baseline (identical to production) version.\nIt compares them, and if the canary is OK, it triggers the production deployment pipeline.",
      "executionEngine": "v2",
      "keepWaitingPipelines": false,
      "lastModifiedBy": "anonymous",
      "limitConcurrent": true,
      "parallel": true,
      "stages": [
      //delete other pipelines
        {
          "analysisType": "realTime",
          "canaryConfig": {
            "beginCanaryAnalysisAfterMins": "0",
            "canaryAnalysisIntervalMins": "3",
            "canaryConfigId": "24913997-a937-4d48-a9e0-52232dfe0b66",
            "lifetimeDuration": "PT0H4M",
            "metricsAccountName": "my-prometheus",
            "scopes": [
              {
                "controlLocation": "default",
                "controlScope": "sampleapp-baseline",
                "experimentLocation": "default",
                "experimentScope": "sampleapp-canary",
                "extendedScopeParams": {},
                "scopeName": "default",
                "step": 60
              }
            ],
            "scoreThresholds": {
              "marginal": "75",
              "pass": "95"
            },
            "storageAccountName": "test001"
          },
          "completeOtherBranchesThenFail": false,
          "continuePipeline": false,
          "failPipeline": true,
          "name": "Canary Analysis",
          "refId": "16",
          "requisiteStageRefIds": [
            "3",
            "4"
          ],
          "type": "kayentaCanary"
        }
      ],
      "triggers": [],
      "updateTs": "1555395155000"
    }

I don’t know what the mistake was ,I’ve been thinking about this for days ,Does anybody know what to do with the problem?

0 Likes