搭建SkyWalking

注意:9.4.0及以后版本均需要jdk11及以上版本

1. 下载解压安装包
2. 修改配置文件
  • 修改/data/apache-skywalking-apm-bin/config/application.yml

    # vim /data/apache-skywalking-apm-bin/config/application.yml
    # 主要修改以下内容
    storage:
      # 设置存储模式为es
      selector: ${SW_STORAGE:elasticsearch}
      elasticsearch:
        namespace: ${SW_NAMESPACE:""}
        # es地址
        clusterNodes: ${SW_STORAGE_ES_CLUSTER_NODES:172.21.126.157:9201}
        protocol: ${SW_STORAGE_ES_HTTP_PROTOCOL:"http"}
        connectTimeout: ${SW_STORAGE_ES_CONNECT_TIMEOUT:3000}
        socketTimeout: ${SW_STORAGE_ES_SOCKET_TIMEOUT:30000}
        responseTimeout: ${SW_STORAGE_ES_RESPONSE_TIMEOUT:15000}
        numHttpClientThread: ${SW_STORAGE_ES_NUM_HTTP_CLIENT_THREAD:0}
        # es用户
        user: ${SW_ES_USER:"elastic"}
        # es密码
        password: ${SW_ES_PASSWORD:"123456"}
        trustStorePath: ${SW_STORAGE_ES_SSL_JKS_PATH:""}
        trustStorePass: ${SW_STORAGE_ES_SSL_JKS_PASS:""}
        secretsManagementFile: ${SW_ES_SECRETS_MANAGEMENT_FILE:""}
    
        # 索引步长,默认1,每天一个索引
        dayStep: ${SW_STORAGE_DAY_STEP:1}
        # 索引分片数
        indexShardsNumber: ${SW_STORAGE_ES_INDEX_SHARDS_NUMBER:1}
        # 索引副本数这里设置0副本,因为我们是单节点es
        indexReplicasNumber: ${SW_STORAGE_ES_INDEX_REPLICAS_NUMBER:0}
    
  • 修改web展示页面配置文件

    /data/apache-skywalking-apm-bin/webapp/application.yml

    # vim /data/apache-skywalking-apm-bin/webapp/application.yml
    # 修改下端口号避免冲突,默认端口8080
    serverPort: ${SW_SERVER_PORT:-18080}
    
3. 启动服务
# 进入启动目录
cd /data/apache-skywalking-apm-bin/bin
# 一起启动oap服务和web服务
./startup.sh

# 单独启动oap服务
./oapService.sh

# 单独启动web服务
./webappService.sh
4. 访问UI界面
  • web页面访问

    # 访问地址
    172.21.126.172:18080
    
    # 说明:部署agent后,代码集成Skywalking Agent后写信息,过1,2分钟后就可以看到数据了
    
  • web页面添加认证

    背景:SkyWalking从8.0.0版本开始弃用自身的安全认证功能;因此我们只能通过nginx来实现安全认证

    # 安装htpasswd命令工具
    yum install -y httpd-tools
    
    # 创建加密文件
    # 系统会提示输入密码,之后会创建一个用户为admin的认证文件并输出到/data/nginx/htpasswd里
    htpasswd -c /data/nginx/htpasswd admin
    
    # 修改nginx配置nginx.conf;新增一个8081端口
        server {
          listen       8081;
            location / {
              auth_basic "Restricted Access";
              auth_basic_user_file /data/nginx/htpasswd;
              proxy_pass http://172.21.126.172:18080;
          }
        }
     # 检查配置文件格式
     nginx -t
     # 重启nginx
     nginx -s reload
    
     # skywalking访问地址(nginx代理之后)
     # ip地址为nginx的地址; 账号admin密码是上面创建认证文件时候输入的密码
     http://172.21.126.26:8081
    
    
5. 配置文件介绍(下面这块不需要看只是配置文件介绍)
  • 集群配置块
    #集群配置
    cluster:
      # 选择哪一种集群模式
      selector: ${SW_CLUSTER:standalone}
      # 单机版
      standalone:
      # zk 注册集群配置,zk 版本保证在3.5以上,oap-libs中也对3.4进行了支持
      zookeeper:
        nameSpace: ${SW_NAMESPACE:""}
        hostPort: ${SW_CLUSTER_ZK_HOST_PORT:localhost:2181}
        ## Retry Policy 重试策略
        # initial amount of time to wait between retries
        # 初始化等待时间
        baseSleepTimeMs: ${SW_CLUSTER_ZK_SLEEP_TIME:1000} 
        # max number of times to retry 最大重试次数
        maxRetries: ${SW_CLUSTER_ZK_MAX_RETRIES:3} 
        # Enable ACL 开启访问控制
        enableACL: ${SW_ZK_ENABLE_ACL:false} # disable ACL in default
        schema: ${SW_ZK_SCHEMA:digest} # only support digest schema
        expression: ${SW_ZK_EXPRESSION:skywalking:skywalking}
      # k8s部署配置
      kubernetes:
        namespace: ${SW_CLUSTER_K8S_NAMESPACE:default}
        labelSelector: ${SW_CLUSTER_K8S_LABEL:app=collector,release=skywalking}
        uidEnvName: ${SW_CLUSTER_K8S_UID:SKYWALKING_COLLECTOR_UID}
      # consul 注册集群配置
      consul:
        serviceName: ${SW_SERVICE_NAME:"SkyWalking_OAP_Cluster"}
        # Consul cluster nodes, example: 10.0.0.1:8500,10.0.0.2:8500,10.0.0.3:8500
        hostPort: ${SW_CLUSTER_CONSUL_HOST_PORT:localhost:8500}
        aclToken: ${SW_CLUSTER_CONSUL_ACLTOKEN:""}
      # etcd 注册集群配置
      etcd:
        serviceName: ${SW_SERVICE_NAME:"SkyWalking_OAP_Cluster"}
        # etcd cluster nodes, example: 10.0.0.1:2379,10.0.0.2:2379,10.0.0.3:2379
        hostPort: ${SW_CLUSTER_ETCD_HOST_PORT:localhost:2379}
      # nacos 注册集群配置
      nacos:
        serviceName: ${SW_SERVICE_NAME:"SkyWalking_OAP_Cluster"}
        hostPort: ${SW_CLUSTER_NACOS_HOST_PORT:localhost:8848}
        # Nacos Configuration namespace
        namespace: ${SW_CLUSTER_NACOS_NAMESPACE:"public"}
        # Nacos auth username
        username: ${SW_CLUSTER_NACOS_USERNAME:""}
        password: ${SW_CLUSTER_NACOS_PASSWORD:""}
        # Nacos auth accessKey
        accessKey: ${SW_CLUSTER_NACOS_ACCESSKEY:""}
        secretKey: ${SW_CLUSTER_NACOS_SECRETKEY:""}
    
  • core核心配置
    core:
    #配置选择
      selector: ${SW_CORE:default}
      default:
        # Mixed: Receive agent data, Level 1 aggregate, Level 2 aggregate
        # Receiver: Receive agent data, Level 1 aggregate
        # Aggregator: Level 2 aggregate
        # 本服务的角色,包含3总类型,当数据量较大时,需要分角色部署,增加处理能力。
        # Mixed: 混合模式,接收数据,1级汇总数据,2级汇总数据
        # Receiver: 接收角色,数据接收,1级汇总数据,持久化,告警
        # Aggregator: 聚合角色,2级汇总数据,持久化,告警
    
        role: ${SW_CORE_ROLE:Mixed} # Mixed/Receiver/Aggregator
    #接收ip
        restHost: ${SW_CORE_REST_HOST:0.0.0.0}
    #接收端口
        restPort: ${SW_CORE_REST_PORT:12800}
    #服务路径
        restContextPath: ${SW_CORE_REST_CONTEXT_PATH:/}
    #最小线程
        restMaxThreads: ${SW_CORE_REST_MAX_THREADS:200}
    #线程空闲时间,超过关闭
        restIdleTimeOut: ${SW_CORE_REST_IDLE_TIMEOUT:30000}
    #接收队列大小
        restAcceptQueueSize: ${SW_CORE_REST_QUEUE_SIZE:0}
    #http最大请求标头大小
        httpMaxRequestHeaderSize: ${SW_CORE_HTTP_MAX_REQUEST_HEADER_SIZE:8192}
    #grpc服务接收ip
        gRPCHost: ${SW_CORE_GRPC_HOST:0.0.0.0}
    #grpc服务接收端口
        gRPCPort: ${SW_CORE_GRPC_PORT:11800}
    #最大并发连接数
        maxConcurrentCallsPerConnection: ${SW_CORE_GRPC_MAX_CONCURRENT_CALL:0}
    #最大消息体
        maxMessageSize: ${SW_CORE_GRPC_MAX_MESSAGE_SIZE:0}
    #gRpc线程队列大小
        gRPCThreadPoolQueueSize: ${SW_CORE_GRPC_POOL_QUEUE_SIZE:-1}
    #gRpc线程队列
        gRPCThreadPoolSize: ${SW_CORE_GRPC_THREAD_POOL_SIZE:-1}
    #gRpc安全协议是否开启
        gRPCSslEnabled: ${SW_CORE_GRPC_SSL_ENABLED:false}
        gRPCSslKeyPath: ${SW_CORE_GRPC_SSL_KEY_PATH:""}
        gRPCSslCertChainPath: ${SW_CORE_GRPC_SSL_CERT_CHAIN_PATH:""}
        gRPCSslTrustedCAPath: ${SW_CORE_GRPC_SSL_TRUSTED_CA_PATH:""}
    #采样率
        downsampling:
          - Hour
          - Day
    #采样数据留存时间,超过自动删除
    # Set a timeout on metrics data. After the timeout has expired, the metrics data will automatically be deleted.
    #是否开启数据保持处理器,关闭则不删除采样数据
        enableDataKeeperExecutor: ${SW_CORE_ENABLE_DATA_KEEPER_EXECUTOR:true} # Turn it off then automatically metrics data delete will be close.
    #数据保持处理器执行频率,单位分钟,默认5分钟一次
        dataKeeperExecutePeriod: ${SW_CORE_DATA_KEEPER_EXECUTE_PERIOD:5} # How often the data keeper executor runs periodically, unit is minute
    #数据保留时间,单位天
        recordDataTTL: ${SW_CORE_RECORD_DATA_TTL:3} # Unit is day
    #分析数据保留时间,单位天
        metricsDataTTL: ${SW_CORE_METRICS_DATA_TTL:7} # Unit is day
    #L1聚合刷新到L2聚合的周期。 单位是毫秒。
        # The period of L1 aggregation flush to L2 aggregation. Unit is ms.
        l1FlushPeriod: ${SW_CORE_L1_AGGREGATION_FLUSH_PERIOD:500
    #会话时间的阈值,单位是毫秒,默认值为 70 秒。
        # The threshold of session time. Unit is ms. Default value is 70s.
        storageSessionTimeout: ${SW_CORE_STORAGE_SESSION_TIMEOUT:70000}
    #做数据持久化的时期,单位为秒,默认值为25s
        # The period of doing data persistence. Unit is second.Default value is 25s
        persistentPeriod: ${SW_CORE_PERSISTENT_PERIOD:25}
    #如果OAP集群在一分钟内发生变化,缓存metrics数据以减少数据库查询
        # Cache metrics data for 1 minute to reduce database queries, and if the OAP cluster changes within that minute,
    #如果OAP集群在那一分钟内发生变化,那么在那一分钟内,这些度量可能不准确
        # the metrics may not be accurate within that minute.
        enableDatabaseSession: ${SW_CORE_ENABLE_DATABASE_SESSION:true}
    #每个报告周期的前N条记录, unit is minute
        topNReportPeriod: ${SW_CORE_TOPN_REPORT_PERIOD:10} # top_n record worker report cycle, unit is minute
    #额外的模型字段,用于在可视化工具中查看es数据,开启会增加一定的性能损耗。
        activeExtraModelColumns: ${SW_CORE_ACTIVE_EXTRA_MODEL_COLUMNS:false}
    #服务名最大长度,服务名+实例名的最大长度必须小于200
        # The max length of service + instance names should be less than 200
        serviceNameMaxLength: ${SW_SERVICE_NAME_MAX_LENGTH:70}
    #实例名最大长度,服务名+实例名的最大长度必须小于200
        instanceNameMaxLength: ${SW_INSTANCE_NAME_MAX_LENGTH:70}
    #端点名最大长度,服务名+端点名(api)的最大长度必须小于240
        # The max length of service + endpoint names should be less than 240
        endpointNameMaxLength: ${SW_ENDPOINT_NAME_MAX_LENGTH:150}
    #定义一组span标记键,这些键可以通过GraphQL进行搜索
        # Define the set of span tag keys, which should be searchable through the GraphQL.
        searchableTracesTags: ${SW_SEARCHABLE_TAG_KEYS:http.method,http.status_code,rpc.status_code,db.type,db.instance,mq.queue,mq.topic,mq.broker}
        # Define the set of log tag keys, which should be searchable through the GraphQL.
        searchableLogsTags: ${SW_SEARCHABLE_LOGS_TAG_KEYS:level}
        # Define the set of alarm tag keys, which should be searchable through the GraphQL.
        searchableAlarmTags: ${SW_SEARCHABLE_ALARM_TAG_KEYS:level}
        # The max size of tags keys for autocomplete select.
        autocompleteTagKeysQueryMaxSize: ${SW_AUTOCOMPLETE_TAG_KEYS_QUERY_MAX_SIZE:100}
        # The max size of tags values for autocomplete select.
        autocompleteTagValuesQueryMaxSize: ${SW_AUTOCOMPLETE_TAG_VALUES_QUERY_MAX_SIZE:100}
        # The number of threads used to prepare metrics data to the storage.
        prepareThreads: ${SW_CORE_PREPARE_THREADS:2}
        # Turn it on then automatically grouping endpoint by the given OpenAPI definitions.
        enableEndpointNameGroupingByOpenapi: ${SW_CORE_ENABLE_ENDPOINT_NAME_GROUPING_BY_OPAENAPI:true}
    
  • storage数据存储配置
    storage:
    #选择数据存储类型
      selector: ${SW_STORAGE:elasticsearch}
      elasticsearch:
        namespace: ${SW_NAMESPACE:""}
        clusterNodes: ${SW_STORAGE_ES_CLUSTER_NODES:10.19.9.85:9200}
        protocol: ${SW_STORAGE_ES_HTTP_PROTOCOL:"http"}
        connectTimeout: ${SW_STORAGE_ES_CONNECT_TIMEOUT:3000}
        socketTimeout: ${SW_STORAGE_ES_SOCKET_TIMEOUT:30000}
        responseTimeout: ${SW_STORAGE_ES_RESPONSE_TIMEOUT:15000}
        numHttpClientThread: ${SW_STORAGE_ES_NUM_HTTP_CLIENT_THREAD:0}
        user: ${SW_ES_USER:""}
        password: ${SW_ES_PASSWORD:""}
        trustStorePath: ${SW_STORAGE_ES_SSL_JKS_PATH:""}
        trustStorePass: ${SW_STORAGE_ES_SSL_JKS_PASS:""}
    #安全管理文件,内容包括用户名/密码,由第三方工具管理
        secretsManagementFile: ${SW_ES_SECRETS_MANAGEMENT_FILE:""} 
    #索引步长,默认1,每天一个索引
        dayStep: ${SW_STORAGE_DAY_STEP:1} 
    #索引分片数
        indexShardsNumber: ${SW_STORAGE_ES_INDEX_SHARDS_NUMBER:1} 
    #索引副本数
        indexReplicasNumber: ${SW_STORAGE_ES_INDEX_REPLICAS_NUMBER:1} 
    #大数据集配置,当查询包含超数据集,下面三种配置可以提高新能
    #大数据存储索引保留天数,小于0则与dayStep一致
        superDatasetDayStep: ${SW_SUPERDATASET_STORAGE_DAY_STEP:-1} 
    #大数据存储分片因子,分片数为indexShardsNumber*superDatasetIndexShardsFactor,此因素也会影响Zipkin和Jaeger的踪迹
        superDatasetIndexShardsFactor: ${SW_STORAGE_ES_SUPER_DATASET_INDEX_SHARDS_FACTOR:5}
    #大数据索引副本数
        superDatasetIndexReplicasNumber: ${SW_STORAGE_ES_SUPER_DATASET_INDEX_REPLICAS_NUMBER:0} 
    #索引模板的顺序
        indexTemplateOrder: ${SW_STORAGE_ES_INDEX_TEMPLATE_ORDER:0} 
    #异步批量写库,默认5000条
        bulkActions: ${SW_STORAGE_ES_BULK_ACTIONS:5000} 
    #无论请求数量如何,每 15 秒刷新一次批量
        flushInterval: ${SW_STORAGE_ES_FLUSH_INTERVAL:15}
    #并发请求数
        concurrentRequests: ${SW_STORAGE_ES_CONCURRENT_REQUESTS:2} 
    #结果最大数据量
        resultWindowMaxSize: ${SW_STORAGE_ES_QUERY_MAX_WINDOW_SIZE:10000}
    #源数据查询最大数据量
        metadataQueryMaxSize: ${SW_STORAGE_ES_QUERY_MAX_SIZE:10000}\
    #滚动数据量大小
        scrollingBatchSize: ${SW_STORAGE_ES_SCROLLING_BATCH_SIZE:5000}
    #分段查询最大数据量
        segmentQueryMaxSize: ${SW_STORAGE_ES_QUERY_SEGMENT_SIZE:200}
    #任务查询最大条数
        profileTaskQueryMaxSize: ${SW_STORAGE_ES_QUERY_PROFILE_TASK_SIZE:200}
    #profile 数据查询批量大小
        profileDataQueryBatchSize: ${SW_STORAGE_ES_QUERY_PROFILE_DATA_BATCH_SIZE:100}
    #OAP分析器
        oapAnalyzer: ${SW_STORAGE_ES_OAP_ANALYZER:"{\"analyzer\":{\"oap_analyzer\":{\"type\":\"stop\"}}}"} # the oap analyzer.
    #OPA日志分析器
        oapLogAnalyzer: ${SW_STORAGE_ES_OAP_LOG_ANALYZER:"{\"analyzer\":{\"oap_log_analyzer\":{\"type\":\"standard\"}}}"} # the oap log analyzer. It could be customized by the ES analyzer configuration to support more language log formats, such as Chinese log, Japanese log and etc.
        advanced: ${SW_STORAGE_ES_ADVANCED:""}
      h2:
        driver: ${SW_STORAGE_H2_DRIVER:org.h2.jdbcx.JdbcDataSource}
        url: ${SW_STORAGE_H2_URL:jdbc:h2:mem:skywalking-oap-db;DB_CLOSE_DELAY=-1}
        user: ${SW_STORAGE_H2_USER:sa}
        metadataQueryMaxSize: ${SW_STORAGE_H2_QUERY_MAX_SIZE:5000}
        maxSizeOfBatchSql: ${SW_STORAGE_MAX_SIZE_OF_BATCH_SQL:100}
        asyncBatchPersistentPoolSize: ${SW_STORAGE_ASYNC_BATCH_PERSISTENT_POOL_SIZE:1}
      mysql:
        properties:
          jdbcUrl: ${SW_JDBC_URL:"jdbc:mysql://localhost:3306/swtest?rewriteBatchedStatements=true"}
          dataSource.user: ${SW_DATA_SOURCE_USER:root}
          dataSource.password: ${SW_DATA_SOURCE_PASSWORD:root@1234}
          dataSource.cachePrepStmts: ${SW_DATA_SOURCE_CACHE_PREP_STMTS:true}
          dataSource.prepStmtCacheSize: ${SW_DATA_SOURCE_PREP_STMT_CACHE_SQL_SIZE:250}
          dataSource.prepStmtCacheSqlLimit: ${SW_DATA_SOURCE_PREP_STMT_CACHE_SQL_LIMIT:2048}
          dataSource.useServerPrepStmts: ${SW_DATA_SOURCE_USE_SERVER_PREP_STMTS:true}
        metadataQueryMaxSize: ${SW_STORAGE_MYSQL_QUERY_MAX_SIZE:5000}
        maxSizeOfBatchSql: ${SW_STORAGE_MAX_SIZE_OF_BATCH_SQL:2000}
        asyncBatchPersistentPoolSize: ${SW_STORAGE_ASYNC_BATCH_PERSISTENT_POOL_SIZE:4}
      tidb:
        properties:
          jdbcUrl: ${SW_JDBC_URL:"jdbc:mysql://localhost:4000/tidbswtest?rewriteBatchedStatements=true"}
          dataSource.user: ${SW_DATA_SOURCE_USER:root}
          dataSource.password: ${SW_DATA_SOURCE_PASSWORD:""}
          dataSource.cachePrepStmts: ${SW_DATA_SOURCE_CACHE_PREP_STMTS:true}
          dataSource.prepStmtCacheSize: ${SW_DATA_SOURCE_PREP_STMT_CACHE_SQL_SIZE:250}
          dataSource.prepStmtCacheSqlLimit: ${SW_DATA_SOURCE_PREP_STMT_CACHE_SQL_LIMIT:2048}
          dataSource.useServerPrepStmts: ${SW_DATA_SOURCE_USE_SERVER_PREP_STMTS:true}
          dataSource.useAffectedRows: ${SW_DATA_SOURCE_USE_AFFECTED_ROWS:true}
        metadataQueryMaxSize: ${SW_STORAGE_MYSQL_QUERY_MAX_SIZE:5000}
        maxSizeOfBatchSql: ${SW_STORAGE_MAX_SIZE_OF_BATCH_SQL:2000}
        asyncBatchPersistentPoolSize: ${SW_STORAGE_ASYNC_BATCH_PERSISTENT_POOL_SIZE:4}
      postgresql:
        properties:
          jdbcUrl: ${SW_JDBC_URL:"jdbc:postgresql://localhost:5432/skywalking"}
          dataSource.user: ${SW_DATA_SOURCE_USER:postgres}
          dataSource.password: ${SW_DATA_SOURCE_PASSWORD:123456}
          dataSource.cachePrepStmts: ${SW_DATA_SOURCE_CACHE_PREP_STMTS:true}
          dataSource.prepStmtCacheSize: ${SW_DATA_SOURCE_PREP_STMT_CACHE_SQL_SIZE:250}
          dataSource.prepStmtCacheSqlLimit: ${SW_DATA_SOURCE_PREP_STMT_CACHE_SQL_LIMIT:2048}
          dataSource.useServerPrepStmts: ${SW_DATA_SOURCE_USE_SERVER_PREP_STMTS:true}
        metadataQueryMaxSize: ${SW_STORAGE_MYSQL_QUERY_MAX_SIZE:5000}
        maxSizeOfBatchSql: ${SW_STORAGE_MAX_SIZE_OF_BATCH_SQL:2000}
        asyncBatchPersistentPoolSize: ${SW_STORAGE_ASYNC_BATCH_PERSISTENT_POOL_SIZE:4}
      banyandb:
        host: ${SW_STORAGE_BANYANDB_HOST:127.0.0.1}
        port: ${SW_STORAGE_BANYANDB_PORT:17912}
        maxBulkSize: ${SW_STORAGE_BANYANDB_MAX_BULK_SIZE:5000}
        flushInterval: ${SW_STORAGE_BANYANDB_FLUSH_INTERVAL:15}
        metricsShardsNumber: ${SW_STORAGE_BANYANDB_METRICS_SHARDS_NUMBER:1}
        recordShardsNumber: ${SW_STORAGE_BANYANDB_RECORD_SHARDS_NUMBER:1}
        superDatasetShardsFactor: ${SW_STORAGE_BANYANDB_SUPERDATASET_SHARDS_FACTOR:2}
        concurrentWriteThreads: ${SW_STORAGE_BANYANDB_CONCURRENT_WRITE_THREADS:15}
        profileTaskQueryMaxSize: ${SW_STORAGE_BANYANDB_PROFILE_TASK_QUERY_MAX_SIZE:200} # the max number of fetch task in a request