Skip to content

ArgoCD 实现与最佳实践

ArgoCD是一个为Kubernetes设计的声明式GitOps持续交付工具,它通过监控Git仓库中的配置变更,自动同步应用程序状态到Kubernetes集群。本文详细介绍ArgoCD的安装配置、核心功能使用和企业级最佳实践。

🚀 ArgoCD 安装与配置

基础安装部署

yaml
# ArgoCD快速安装
quick_installation:
  # 方法1: 使用官方YAML清单
  official_manifest: |
    # 创建namespace
    kubectl create namespace argocd
    
    # 安装ArgoCD
    kubectl apply -n argocd -f https://raw.githubusercontent.com/argoproj/argo-cd/stable/manifests/install.yaml
    
    # 等待所有Pod就绪
    kubectl wait --for=condition=ready pod --all -n argocd --timeout=600s
  
  # 方法2: 使用Helm Chart
  helm_installation: |
    # 添加ArgoCD Helm仓库
    helm repo add argo https://argoproj.github.io/argo-helm
    helm repo update
    
    # 创建values.yaml配置文件
    cat > argocd-values.yaml << EOF
    global:
      image:
        tag: v2.8.4
    
    controller:
      replicas: 1
      resources:
        limits:
          cpu: 500m
          memory: 512Mi
        requests:
          cpu: 250m
          memory: 256Mi
    
    server:
      replicas: 2
      resources:
        limits:
          cpu: 100m
          memory: 128Mi
        requests:
          cpu: 50m
          memory: 64Mi
      
      # 配置Ingress
      ingress:
        enabled: true
        ingressClassName: nginx
        annotations:
          nginx.ingress.kubernetes.io/ssl-redirect: "true"
          nginx.ingress.kubernetes.io/backend-protocol: "GRPC"
        hosts:
          - argocd.example.com
        tls:
          - secretName: argocd-server-tls
            hosts:
              - argocd.example.com
    
    repoServer:
      replicas: 2
      resources:
        limits:
          cpu: 1
          memory: 1Gi
        requests:
          cpu: 100m
          memory: 256Mi
    
    redis-ha:
      enabled: true
    EOF
    
    # 安装ArgoCD
    helm install argocd argo/argo-cd \
      --namespace argocd \
      --create-namespace \
      --values argocd-values.yaml
  
  # 访问Web UI
  access_ui: |
    # 获取初始admin密码
    kubectl -n argocd get secret argocd-initial-admin-secret \
      -o jsonpath="{.data.password}" | base64 -d
    
    # 端口转发访问(开发环境)
    kubectl port-forward svc/argocd-server -n argocd 8080:443
    
    # 访问 https://localhost:8080
    # 用户名: admin
    # 密码: 上面获取的密码
yaml
production_setup:
  # 高可用配置
  high_availability: |
    apiVersion: v1
    kind: ConfigMap
    metadata:
      name: argocd-server-config
      namespace: argocd
    data:
      # 启用HA模式
      application.instanceLabelKey: argocd.argoproj.io/instance
      server.rbac.log.enforce.enable: "true"
      
      # 配置Redis HA
      redis.server: "argocd-redis-ha-haproxy:6379"
      
      # 配置Repository服务器HA
      repository.credentials: |
        - url: https://github.com/myorg
          passwordSecret:
            name: github-secret
            key: password
          usernameSecret:
            name: github-secret
            key: username
        - url: https://gitlab.example.com
          passwordSecret:
            name: gitlab-secret
            key: password
          usernameSecret:
            name: gitlab-secret
            key: username
    
    ---
    # Redis HA配置
    apiVersion: v1
    kind: ConfigMap
    metadata:
      name: argocd-redis-ha-configmap
      namespace: argocd
    data:
      redis.conf: |
        dir /data
        port 6379
        bind 0.0.0.0
        maxmemory 256mb
        maxmemory-policy allkeys-lru
        save 900 1
        save 300 10
        save 60 10000
  
  # RBAC配置
  rbac_configuration: |
    apiVersion: v1
    kind: ConfigMap
    metadata:
      name: argocd-rbac-cm
      namespace: argocd
    data:
      # 策略规则
      policy.default: role:readonly
      policy.csv: |
        # 管理员角色
        p, role:admin, applications, *, */*, allow
        p, role:admin, clusters, *, *, allow
        p, role:admin, repositories, *, *, allow
        
        # 开发者角色
        p, role:developer, applications, get, */*, allow
        p, role:developer, applications, sync, default/*, allow
        p, role:developer, applications, action/*, default/*, allow
        p, role:developer, applications, update, default/*, allow
        
        # 只读角色
        p, role:readonly, applications, get, */*, allow
        p, role:readonly, clusters, get, *, allow
        p, role:readonly, repositories, get, *, allow
        
        # 用户和组映射
        g, myorg:admins, role:admin
        g, myorg:developers, role:developer
        g, myorg:viewers, role:readonly
      
      # OIDC配置
      oidc.config: |
        name: OIDC
        issuer: https://auth.example.com
        clientId: argocd
        clientSecret: $oidc.clientSecret
        requestedScopes: ["openid", "profile", "email", "groups"]
        requestedIDTokenClaims: {"groups": {"essential": true}}
      
      # LDAP配置
      ldap.config: |
        host: ldap.example.com
        port: 636
        useSSL: true
        startTLS: false
        bindDN: cn=argocd,ou=service-accounts,dc=example,dc=com
        bindPassword: $ldap.bindPassword
        searchBaseDN: dc=example,dc=com
        searchFilter: (&(objectClass=person)(uid=%s))
        attributes:
          name: displayName
          email: mail
          groups: memberOf
  
  # 多集群配置
  multi_cluster_setup: |
    # 添加外部集群
    apiVersion: v1
    kind: Secret
    metadata:
      name: staging-cluster
      namespace: argocd
      labels:
        argocd.argoproj.io/secret-type: cluster
    type: Opaque
    stringData:
      name: staging-cluster
      server: https://staging-k8s.example.com
      config: |
        {
          "bearerToken": "eyJhbGciOiJSUzI1NiIs...",
          "tlsClientConfig": {
            "caData": "LS0tLS1CRUdJTi...",
            "insecure": false
          }
        }
    
    ---
    apiVersion: v1
    kind: Secret
    metadata:
      name: production-cluster
      namespace: argocd
      labels:
        argocd.argoproj.io/secret-type: cluster
    type: Opaque
    stringData:
      name: production-cluster
      server: https://prod-k8s.example.com
      config: |
        {
          "bearerToken": "eyJhbGciOiJSUzI1NiIs...",
          "tlsClientConfig": {
            "caData": "LS0tLS1CRUdJTi...",
            "certData": "LS0tLS1CRUdJTi...",
            "keyData": "LS0tLS1CRUdJTi..."
          }
        }
yaml
security_configuration:
  # 证书和TLS配置
  tls_configuration: |
    # 自签名证书生成
    openssl req -new -newkey rsa:2048 -days 365 -nodes -x509 \
      -subj "/C=US/ST=CA/L=San Francisco/O=MyOrg/CN=argocd.example.com" \
      -keyout argocd.key -out argocd.crt
    
    # 创建TLS Secret
    kubectl create secret tls argocd-server-tls \
      --cert=argocd.crt \
      --key=argocd.key \
      -n argocd
  
  # 密钥管理
  secret_management: |
    # 使用Sealed Secrets
    apiVersion: bitnami.com/v1alpha1
    kind: SealedSecret
    metadata:
      name: github-repo-secret
      namespace: argocd
    spec:
      encryptedData:
        password: AgBy3i4OJSWK+PiTySYZZA9rO43cGDEQAx...
        username: AgBy3i4OJSWK+PiTySYZZA9rO43cGDEQAx...
      template:
        metadata:
          name: github-repo-secret
          labels:
            argocd.argoproj.io/secret-type: repository
        type: Opaque
    
    ---
    # 使用External Secrets Operator
    apiVersion: external-secrets.io/v1beta1
    kind: ExternalSecret
    metadata:
      name: gitlab-repo-secret
      namespace: argocd
    spec:
      refreshInterval: 1h
      secretStoreRef:
        name: vault-secret-store
        kind: SecretStore
      target:
        name: gitlab-repo-secret
        creationPolicy: Owner
        template:
          metadata:
            labels:
              argocd.argoproj.io/secret-type: repository
          data:
            username: "{{ .username }}"
            password: "{{ .password }}"
      data:
      - secretKey: username
        remoteRef:
          key: argocd/gitlab-repo
          property: username
      - secretKey: password
        remoteRef:
          key: argocd/gitlab-repo
          property: password
  
  # 网络安全
  network_security: |
    # NetworkPolicy配置
    apiVersion: networking.k8s.io/v1
    kind: NetworkPolicy
    metadata:
      name: argocd-network-policy
      namespace: argocd
    spec:
      podSelector:
        matchLabels:
          app.kubernetes.io/part-of: argocd
      policyTypes:
      - Ingress
      - Egress
      ingress:
      - from:
        - namespaceSelector:
            matchLabels:
              name: ingress-nginx
        ports:
        - protocol: TCP
          port: 8080
      - from:
        - podSelector:
            matchLabels:
              app.kubernetes.io/part-of: argocd
      egress:
      - to: []
        ports:
        - protocol: TCP
          port: 443  # HTTPS
        - protocol: TCP
          port: 22   # SSH
      - to:
        - podSelector:
            matchLabels:
              app.kubernetes.io/part-of: argocd

应用管理和部署

yaml
application_management:
  # 基础应用定义
  basic_application: |
    apiVersion: argoproj.io/v1alpha1
    kind: Application
    metadata:
      name: guestbook
      namespace: argocd
      finalizers:
        - resources-finalizer.argocd.argoproj.io
    spec:
      project: default
      source:
        repoURL: https://github.com/argoproj/argocd-example-apps.git
        targetRevision: HEAD
        path: guestbook
      destination:
        server: https://kubernetes.default.svc
        namespace: guestbook
      syncPolicy:
        automated:
          prune: true
          selfHeal: true
        syncOptions:
        - CreateNamespace=true
        - PrunePropagationPolicy=foreground
        - PruneLast=true
        retry:
          limit: 5
          backoff:
            duration: 5s
            factor: 2
            maxDuration: 3m
  
  # Helm应用配置
  helm_application: |
    apiVersion: argoproj.io/v1alpha1
    kind: Application
    metadata:
      name: nginx-helm
      namespace: argocd
    spec:
      project: default
      source:
        repoURL: https://charts.bitnami.com/bitnami
        chart: nginx
        targetRevision: 13.2.23
        helm:
          # 值文件覆盖
          valueFiles:
          - values-production.yaml
          
          # 直接设置值
          parameters:
          - name: replicaCount
            value: "3"
          - name: service.type
            value: ClusterIP
          - name: ingress.enabled
            value: "true"
          
          # 值文件内容
          values: |
            image:
              tag: "1.21.6"
            resources:
              limits:
                cpu: 500m
                memory: 512Mi
              requests:
                cpu: 100m
                memory: 128Mi
            nodeSelector:
              node-type: application
      destination:
        server: https://kubernetes.default.svc
        namespace: nginx-production
      syncPolicy:
        automated:
          prune: true
          selfHeal: true
        syncOptions:
        - CreateNamespace=true
        - ServerSideApply=true
  
  # Kustomize应用配置
  kustomize_application: |
    apiVersion: argoproj.io/v1alpha1
    kind: Application
    metadata:
      name: my-app-kustomize
      namespace: argocd
    spec:
      project: default
      source:
        repoURL: https://github.com/myorg/my-app-config
        targetRevision: HEAD
        path: overlays/production
        kustomize:
          # Kustomize构建选项
          buildOptions: --enable-helm
          
          # 公共标签
          commonLabels:
            app.kubernetes.io/managed-by: argocd
            environment: production
          
          # 公共注解
          commonAnnotations:
            argocd.argoproj.io/sync-wave: "1"
          
          # 镜像替换
          images:
          - name: my-app
            newTag: v1.2.3
          
          # 补丁文件
          patchesStrategicMerge:
          - production-patches.yaml
          
          # JSON补丁
          patchesJson6902:
          - target:
              group: apps
              version: v1
              kind: Deployment
              name: my-app
            patch: |-
              - op: replace
                path: /spec/replicas
                value: 5
      destination:
        server: https://kubernetes.default.svc
        namespace: my-app-prod
yaml
project_management:
  # AppProject定义
  app_project: |
    apiVersion: argoproj.io/v1alpha1
    kind: AppProject
    metadata:
      name: team-frontend
      namespace: argocd
    spec:
      description: "Frontend team applications"
      
      # 源仓库限制
      sourceRepos:
      - 'https://github.com/myorg/frontend-*'
      - 'https://charts.bitnami.com/bitnami'
      - 'helm-chart-repo'
      
      # 目标集群和命名空间限制
      destinations:
      - namespace: 'frontend-*'
        server: https://kubernetes.default.svc
      - namespace: 'staging-*'
        server: https://staging-cluster.example.com
      
      # 集群资源白名单
      clusterResourceWhitelist:
      - group: ""
        kind: Namespace
      - group: rbac.authorization.k8s.io
        kind: ClusterRole
      - group: rbac.authorization.k8s.io
        kind: ClusterRoleBinding
      
      # 命名空间资源白名单
      namespaceResourceWhitelist:
      - group: apps
        kind: Deployment
      - group: apps
        kind: ReplicaSet
      - group: ""
        kind: Service
      - group: ""
        kind: ConfigMap
      - group: ""
        kind: Secret
      - group: networking.k8s.io
        kind: Ingress
      
      # 资源黑名单
      namespaceResourceBlacklist:
      - group: ""
        kind: ResourceQuota
      - group: ""
        kind: LimitRange
      
      # 角色定义
      roles:
      - name: developer
        description: "Developer access to frontend applications"
        policies:
        - p, proj:team-frontend:developer, applications, get, team-frontend/*, allow
        - p, proj:team-frontend:developer, applications, sync, team-frontend/*, allow
        - p, proj:team-frontend:developer, applications, action/*, team-frontend/*, allow
        - p, proj:team-frontend:developer, applications, update, team-frontend/*, allow
        - p, proj:team-frontend:developer, logs, get, team-frontend/*, allow
        - p, proj:team-frontend:developer, exec, create, team-frontend/*, allow
        groups:
        - myorg:frontend-developers
        
      - name: lead
        description: "Team lead access"
        policies:
        - p, proj:team-frontend:lead, applications, *, team-frontend/*, allow
        - p, proj:team-frontend:lead, repositories, *, *, allow
        groups:
        - myorg:frontend-leads
      
      # 同步窗口配置
      syncWindows:
      - kind: allow
        schedule: "0 9-17 * * MON-FRI"
        duration: 8h
        applications:
        - team-frontend/*
        manualSync: true
      - kind: deny
        schedule: "0 0-6 * * *"
        duration: 6h
        applications:
        - team-frontend/production-*
        manualSync: false
  
  # 全局项目配置
  global_project_settings: |
    apiVersion: v1
    kind: ConfigMap
    metadata:
      name: argocd-cm
      namespace: argocd
    data:
      # 全局项目设置
      application.instanceLabelKey: argocd.argoproj.io/instance
      
      # 资源动作配置
      resource.customizations.actions.apps_Deployment: |
        discovery.lua: |
          actions = {}
          actions["restart"] = {["disabled"] = false}
          return actions
        definitions:
        - name: restart
          action.lua: |
            local os = require("os")
            if obj.spec.template.metadata == nil then
              obj.spec.template.metadata = {}
            end
            if obj.spec.template.metadata.annotations == nil then
              obj.spec.template.metadata.annotations = {}
            end
            obj.spec.template.metadata.annotations["kubectl.kubernetes.io/restartedAt"] = os.date("!%Y-%m-%dT%TZ")
            return obj
      
      # 资源健康检查自定义
      resource.customizations.health.networking.k8s.io_Ingress: |
        hs = {}
        hs.status = "Healthy"
        return hs
      
      # 应用监控配置
      application.monitoring.enabled: "true"
      application.monitoring.namespace: "monitoring"
      
      # Webhook配置
      webhook.github.secret: github-webhook-secret
      webhook.gitlab.secret: gitlab-webhook-secret
yaml
sync_deployment_strategies:
  # 同步策略配置
  sync_policies: |
    # 自动同步配置
    syncPolicy:
      automated:
        prune: true      # 删除不在Git中的资源
        selfHeal: true   # 自动修复配置漂移
        allowEmpty: false # 不允许空的同步
      
      syncOptions:
      - CreateNamespace=true     # 自动创建命名空间
      - PrunePropagationPolicy=foreground  # 删除传播策略
      - PruneLast=true          # 最后删除资源
      - RespectIgnoreDifferences=true  # 尊重忽略差异
      - ServerSideApply=true    # 使用服务端应用
      - ApplyOutOfSyncOnly=true # 只应用不同步的资源
      
      managedNamespaceMetadata:
        labels:
          managed-by: argocd
          environment: production
        annotations:
          argocd.argoproj.io/managed: "true"
      
      retry:
        limit: 5
        backoff:
          duration: 5s
          factor: 2
          maxDuration: 3m
  
  # 同步波次配置
  sync_waves: |
    # CRDs优先部署 (wave -1)
    apiVersion: apiextensions.k8s.io/v1
    kind: CustomResourceDefinition
    metadata:
      name: mycrds.example.com
      annotations:
        argocd.argoproj.io/sync-wave: "-1"
    
    ---
    # 命名空间和RBAC (wave 0)
    apiVersion: v1
    kind: Namespace
    metadata:
      name: my-app
      annotations:
        argocd.argoproj.io/sync-wave: "0"
    
    ---
    # ConfigMap和Secret (wave 1)
    apiVersion: v1
    kind: ConfigMap
    metadata:
      name: my-app-config
      annotations:
        argocd.argoproj.io/sync-wave: "1"
    
    ---
    # 应用部署 (wave 2)
    apiVersion: apps/v1
    kind: Deployment
    metadata:
      name: my-app
      annotations:
        argocd.argoproj.io/sync-wave: "2"
    
    ---
    # 服务暴露 (wave 3)
    apiVersion: networking.k8s.io/v1
    kind: Ingress
    metadata:
      name: my-app-ingress
      annotations:
        argocd.argoproj.io/sync-wave: "3"
  
  # 资源Hook配置
  resource_hooks: |
    # PreSync Hook - 同步前执行
    apiVersion: batch/v1
    kind: Job
    metadata:
      name: database-migration
      annotations:
        argocd.argoproj.io/hook: PreSync
        argocd.argoproj.io/hook-delete-policy: BeforeHookCreation
    spec:
      template:
        spec:
          containers:
          - name: migrate
            image: migrate/migrate:latest
            command: ["migrate", "-path", "/migrations", "-database", "postgres://...", "up"]
          restartPolicy: Never
    
    ---
    # PostSync Hook - 同步后执行
    apiVersion: batch/v1
    kind: Job
    metadata:
      name: smoke-test
      annotations:
        argocd.argoproj.io/hook: PostSync
        argocd.argoproj.io/hook-delete-policy: HookSucceeded
    spec:
      template:
        spec:
          containers:
          - name: test
            image: my-app:latest
            command: ["./smoke-test.sh"]
          restartPolicy: Never
    
    ---
    # Sync Hook - 同步期间执行
    apiVersion: v1
    kind: Pod
    metadata:
      name: sync-notification
      annotations:
        argocd.argoproj.io/hook: Sync
        argocd.argoproj.io/hook-delete-policy: HookSucceeded
    spec:
      containers:
      - name: notify
        image: curlimages/curl:latest
        command: 
        - sh
        - -c
        - |
          curl -X POST -H 'Content-type: application/json' \
            --data '{"text":"Deployment started for my-app"}' \
            $SLACK_WEBHOOK_URL
      restartPolicy: Never
  
  # 忽略差异配置
  ignore_differences: |
    # 应用级忽略差异
    spec:
      ignoreDifferences:
      - group: apps
        kind: Deployment
        jsonPointers:
        - /spec/replicas  # 忽略副本数差异(HPA控制)
        - /spec/template/spec/containers/0/image  # 忽略镜像差异
      
      - group: ""
        kind: Service
        jqPathExpressions:
        - .spec.clusterIP  # 忽略ClusterIP差异
        - .metadata.resourceVersion
      
      # 全局忽略差异(ConfigMap配置)
      resource.compareoptions: |
        ignoreAggregatedRoles: true
        ignoreResourceStatusField: all
        
      resource.customizations.ignoreDifferences.apps_Deployment: |
        jsonPointers:
        - /spec/replicas
        - /spec/template/spec/containers/0/resources
        jqPathExpressions:
        - .spec.template.spec.containers[].env[]?.valueFrom.fieldRef.apiVersion

🔧 高级特性和集成

渐进式交付集成

yaml
progressive_delivery:
  # Argo Rollouts安装
  rollouts_installation: |
    # 安装Argo Rollouts
    kubectl create namespace argo-rollouts
    kubectl apply -n argo-rollouts -f https://github.com/argoproj/argo-rollouts/releases/latest/download/install.yaml
    
    # 安装Rollouts Dashboard
    kubectl apply -f https://raw.githubusercontent.com/argoproj/argo-rollouts/stable/manifests/dashboard-install.yaml
  
  # Canary部署配置
  canary_rollout: |
    apiVersion: argoproj.io/v1alpha1
    kind: Rollout
    metadata:
      name: my-app-rollout
      namespace: my-app
    spec:
      replicas: 10
      strategy:
        canary:
          # Canary服务配置
          canaryService: my-app-canary
          stableService: my-app-stable
          
          # Ingress流量分割
          trafficRouting:
            nginx:
              stableIngress: my-app-stable
              additionalIngressAnnotations:
                canary-by-header: X-Canary
                canary-by-header-value: "true"
          
          # Canary步骤
          steps:
          - setWeight: 10    # 10%流量到Canary
          - pause:           # 暂停等待
              duration: 60s
          - setWeight: 25    # 增加到25%
          - pause:
              duration: 300s
          - setWeight: 50    # 增加到50%
          - pause:
              duration: 600s
          - setWeight: 75    # 增加到75%
          - pause:
              duration: 300s
          
          # 分析配置
          analysis:
            templates:
            - templateName: success-rate
            args:
            - name: service-name
              value: my-app-canary
          
          # 自动提升条件
          maxSurge: "25%"
          maxUnavailable: "25%"
      
      selector:
        matchLabels:
          app: my-app
      template:
        metadata:
          labels:
            app: my-app
        spec:
          containers:
          - name: my-app
            image: my-app:v2.0
            ports:
            - containerPort: 8080
            resources:
              requests:
                cpu: 100m
                memory: 128Mi
              limits:
                cpu: 500m
                memory: 512Mi
  
  # Blue-Green部署配置
  bluegreen_rollout: |
    apiVersion: argoproj.io/v1alpha1
    kind: Rollout
    metadata:
      name: my-app-bluegreen
      namespace: my-app
    spec:
      replicas: 5
      strategy:
        blueGreen:
          # 服务配置
          activeService: my-app-active
          previewService: my-app-preview
          
          # 自动提升配置
          autoPromotionEnabled: false
          scaleDownDelaySeconds: 30
          
          # 提升前分析
          prePromotionAnalysis:
            templates:
            - templateName: success-rate
            args:
            - name: service-name
              value: my-app-preview
          
          # 提升后分析
          postPromotionAnalysis:
            templates:
            - templateName: success-rate
            args:
            - name: service-name
              value: my-app-active
      
      selector:
        matchLabels:
          app: my-app
      template:
        metadata:
          labels:
            app: my-app
        spec:
          containers:
          - name: my-app
            image: my-app:v2.0
  
  # 分析模板配置
  analysis_templates: |
    # 成功率分析模板
    apiVersion: argoproj.io/v1alpha1
    kind: AnalysisTemplate
    metadata:
      name: success-rate
      namespace: my-app
    spec:
      args:
      - name: service-name
      metrics:
      - name: success-rate
        interval: 30s
        count: 10
        successCondition: result[0] >= 0.95
        failureLimit: 3
        provider:
          prometheus:
            address: http://prometheus.monitoring:9090
            query: |
              sum(irate(
                http_requests_total{job="{{args.service-name}}",code=~"2.."}[2m]
              )) / 
              sum(irate(
                http_requests_total{job="{{args.service-name}}"}[2m]
              ))
      
      - name: latency
        interval: 30s
        count: 10
        successCondition: result[0] <= 0.5  # 500ms
        provider:
          prometheus:
            address: http://prometheus.monitoring:9090
            query: |
              histogram_quantile(0.95,
                sum(rate(
                  http_request_duration_seconds_bucket{job="{{args.service-name}}"}[2m]
                )) by (le)
              )
    
    ---
    # Web分析模板
    apiVersion: argoproj.io/v1alpha1
    kind: AnalysisTemplate
    metadata:
      name: web-analysis
      namespace: my-app
    spec:
      metrics:
      - name: lighthouse-score
        count: 1
        provider:
          web:
            url: https://{{args.service-name}}.example.com
            headers:
              - key: X-Test-User
                value: canary
            timeoutSeconds: 30
            jsonPath: "{$.categories.performance.score}"
        successCondition: "result >= 0.8"  # 80分以上
yaml
monitoring_integration:
  # Prometheus监控配置
  prometheus_config: |
    # ArgoCD ServiceMonitor
    apiVersion: monitoring.coreos.com/v1
    kind: ServiceMonitor
    metadata:
      name: argocd-metrics
      namespace: argocd
    spec:
      selector:
        matchLabels:
          app.kubernetes.io/name: argocd-metrics
      endpoints:
      - port: metrics
        path: /metrics
        interval: 30s
      
      - port: metrics
        path: /metrics
        interval: 30s
        relabelings:
        - sourceLabels: [__name__]
          regex: 'argocd_.*'
          action: keep
    
    ---
    # ArgoCD应用监控
    apiVersion: monitoring.coreos.com/v1
    kind: ServiceMonitor
    metadata:
      name: argocd-server-metrics
      namespace: argocd
    spec:
      selector:
        matchLabels:
          app.kubernetes.io/component: server
      endpoints:
      - port: metrics
        path: /metrics
  
  # 告警规则配置
  alerting_rules: |
    groups:
    - name: argocd
      rules:
      - alert: ArgoAppHealthDegraded
        expr: argocd_app_health_status{health_status!="Healthy"} == 1
        for: 15m
        labels:
          severity: warning
        annotations:
          summary: "ArgoCD App Health Degraded"
          description: "ArgoCD App {{$labels.name}} health is {{$labels.health_status}}"
      
      - alert: ArgoAppSyncStatusUnknown
        expr: argocd_app_sync_total{sync_status!="Synced"} == 1
        for: 10m
        labels:
          severity: warning
        annotations:
          summary: "ArgoCD App Sync Status Unknown"
          description: "ArgoCD App {{$labels.name}} sync status is {{$labels.sync_status}}"
      
      - alert: ArgoAppNotSynced
        expr: argocd_app_sync_total{sync_status!="Synced"} == 1
        for: 30m
        labels:
          severity: critical
        annotations:
          summary: "ArgoCD App Not Synced"
          description: "ArgoCD App {{$labels.name}} has not been synced for 30m"
      
      - alert: ArgocdComponentDown
        expr: up{job="argocd-metrics"} == 0
        for: 5m
        labels:
          severity: critical
        annotations:
          summary: "ArgoCD Component Down"
          description: "ArgoCD component {{$labels.instance}} is down"
  
  # Grafana仪表盘配置
  grafana_dashboard: |
    {
      "dashboard": {
        "title": "ArgoCD Application Dashboard",
        "panels": [
          {
            "title": "Application Sync Status",
            "type": "stat",
            "targets": [
              {
                "expr": "sum(argocd_app_sync_total) by (sync_status)",
                "legendFormat": "{{sync_status}}"
              }
            ]
          },
          {
            "title": "Application Health Status",
            "type": "piechart", 
            "targets": [
              {
                "expr": "sum(argocd_app_health_status) by (health_status)",
                "legendFormat": "{{health_status}}"
              }
            ]
          },
          {
            "title": "Sync Activity Over Time",
            "type": "graph",
            "targets": [
              {
                "expr": "rate(argocd_app_sync_total[5m])",
                "legendFormat": "Sync Rate"
              }
            ]
          }
        ]
      }
    }
  
  # 通知集成
  notification_config: |
    # Slack通知配置
    apiVersion: v1
    kind: ConfigMap
    metadata:
      name: argocd-notifications-cm
      namespace: argocd
    data:
      service.slack: |
        token: $slack-token
        username: ArgoCD
        icon: :argo:
      
      template.app-deployed: |
        message: |
          {{if eq .serviceType "slack"}}:white_check_mark:{{end}} Application {{.app.metadata.name}} is now running new version.
      
      template.app-health-degraded: |
        message: |
          {{if eq .serviceType "slack"}}:exclamation:{{end}} Application {{.app.metadata.name}} has degraded health status.
      
      trigger.on-deployed: |
        - when: app.status.operationState.phase in ['Succeeded'] and app.status.health.status == 'Healthy'
          send: [app-deployed]
      
      trigger.on-health-degraded: |
        - when: app.status.health.status == 'Degraded'
          send: [app-health-degraded]
      
      subscriptions: |
        - recipients:
          - slack:devops-alerts
          triggers:
          - on-deployed
          - on-health-degraded

📋 ArgoCD 面试重点

核心概念类

  1. ArgoCD的架构组件有哪些?

    • API Server功能和职责
    • Repository Server作用
    • Application Controller机制
    • Redis的使用场景
  2. ArgoCD的同步策略类型?

    • 手动同步vs自动同步
    • Prune和SelfHeal机制
    • 同步波次的应用场景
    • Hook的执行时机
  3. Application和AppProject的关系?

    • AppProject的权限控制
    • 多租户隔离实现
    • 资源白名单和黑名单
    • 同步窗口配置

高级特性类

  1. ArgoCD的多集群管理?

    • 集群注册和配置
    • 跨集群应用部署
    • 集群权限管理
    • 网络连通性要求
  2. 如何实现蓝绿和金丝雀部署?

    • Argo Rollouts集成
    • 流量分割策略
    • 自动分析和回滚
    • 监控指标集成
  3. ArgoCD的安全配置?

    • RBAC权限设计
    • OIDC/LDAP集成
    • TLS证书管理
    • 密钥管理策略

运维实践类

  1. ArgoCD的高可用部署?

    • Redis HA配置
    • 多副本部署
    • 负载均衡配置
    • 备份恢复策略
  2. ArgoCD的监控和告警?

    • Prometheus指标收集
    • 关键告警规则
    • 通知集成配置
    • 性能优化方法
  3. ArgoCD的故障排查?

    • 同步失败诊断
    • 应用健康检查
    • 资源冲突处理
    • 日志分析方法

🔗 相关内容


ArgoCD作为领先的GitOps工具,通过其丰富的功能和强大的可扩展性,为企业提供了完整的云原生应用交付解决方案。深入掌握其配置、管理和最佳实践,是实施成功GitOps策略的关键能力。

正在精进