apache-dolphinscheduler-3.2.0 单机启动及改为mysql存储

一、apache-dolphinscheduler介绍

Apache DolphinScheduler 是一个分布式的开源调度系统,它提供了一种可视化、可扩展、高可用的任务调度和数据处理方案。

二、apache-dolphinscheduler配置

2.1、下载apache-dolphinscheduler

https://dolphinscheduler.apache.org/en-us/download/3.2.0

apache-dolphinscheduler-3.2.0

2.2、H2配置文件

spring:
  jackson:
    time-zone: UTC
    date-format: "yyyy-MM-dd HH:mm:ss"
  banner:
    charset: UTF-8
  cache:
    # default enable cache, you can disable by `type: none`
    type: none
    cache-names:
      - tenant
      - user
      - processDefinition
      - processTaskRelation
      - taskDefinition
    caffeine:
      spec: maximumSize=100,expireAfterWrite=300s,recordStats
  sql:
    init:
      schema-locations: classpath:sql/dolphinscheduler_h2.sql
  datasource:
    driver-class-name: org.h2.Driver
    url: jdbc:h2:mem:dolphinscheduler;MODE=MySQL;DB_CLOSE_DELAY=-1;DATABASE_TO_LOWER=true
    username: sa
    password: ""


  quartz:
    job-store-type: jdbc
    jdbc:
      initialize-schema: never
    properties:
      org.quartz.threadPool.threadPriority: 5
      org.quartz.jobStore.isClustered: true
      org.quartz.jobStore.class: org.springframework.scheduling.quartz.LocalDataSourceJobStore
      org.quartz.scheduler.instanceId: AUTO
      org.quartz.jobStore.tablePrefix: QRTZ_
      org.quartz.jobStore.acquireTriggersWithinLock: true
      org.quartz.scheduler.instanceName: DolphinScheduler
      org.quartz.threadPool.class: org.quartz.simpl.SimpleThreadPool
      org.quartz.jobStore.useProperties: false
      org.quartz.threadPool.makeThreadsDaemons: true
      org.quartz.threadPool.threadCount: 25
      org.quartz.jobStore.misfireThreshold: 60000
      org.quartz.scheduler.makeSchedulerThreadDaemon: true
      org.quartz.jobStore.driverDelegateClass: org.quartz.impl.jdbcjobstore.StdJDBCDelegate
      org.quartz.jobStore.clusterCheckinInterval: 5000
      org.quartz.scheduler.batchTriggerAcquisitionMaxCount: 1
  servlet:
    multipart:
      max-file-size: 1024MB
      max-request-size: 1024MB
  messages:
    basename: i18n/messages
  jpa:
    hibernate:
      ddl-auto: none
  mvc:
    pathmatch:
      matching-strategy: ANT_PATH_MATCHER

registry:
  type: zookeeper
  zookeeper:
    namespace: dolphinscheduler
    connect-string: localhost:2181
    retry-policy:
      base-sleep-time: 60ms
      max-sleep: 300ms
      max-retries: 5
    session-timeout: 30s
    connection-timeout: 9s
    block-until-connected: 600ms
    digest: ~

security:
  authentication:
    # Authentication types (supported types: PASSWORD,LDAP,CASDOOR_SSO)
    type: PASSWORD
    # IF you set type `LDAP`, below config will be effective
    ldap:
      # ldap server config
      urls: ldap://ldap.forumsys.com:389/
      base-dn: dc=example,dc=com
      username: cn=read-only-admin,dc=example,dc=com
      password: password
      user:
        # admin userId when you use LDAP login
        admin: read-only-admin
        identity-attribute: uid
        email-attribute: mail
        # action when ldap user is not exist (supported types: CREATE,DENY)
        not-exist-action: CREATE
      ssl:
        enable: false
        # jks file absolute path && password
        trust-store: "/ldapkeystore.jks"
        trust-store-password: ""
    casdoor:
      user:
        admin: admin
    oauth2:
      enable: false
      provider:
        github:
          authorizationUri: "https://github.com/login/oauth/authorize"
          redirectUri: "http://localhost:12345/dolphinscheduler/redirect/login/oauth2"
          clientId: ""
          clientSecret: ""
          tokenUri: "https://github.com/login/oauth/access_token"
          userInfoUri: "https://api.github.com/user"
          callbackUrl: "http://localhost:5173/login"
          iconUri: ""
          provider: github
        gitee:
          authorizationUri: "https://gitee.com/oauth/authorize"
          redirectUri: "http://127.0.0.1:12345/dolphinscheduler/redirect/login/oauth2"
          clientId: ""
          clientSecret: ""
          tokenUri: "https://gitee.com/oauth/token?grant_type=authorization_code"
          userInfoUri: "https://gitee.com/api/v5/user"
          callbackUrl: "http://127.0.0.1:5173/login"
          iconUri: ""
          provider: gitee

casdoor:
  # Your Casdoor server url
  endpoint: http://localhost:8000
  client-id: ""
  client-secret: ""
  # The certificate may be multi-line, you can use `|-` for ease
  certificate: ""
  # Your organization name added in Casdoor
  organization-name: built-in
  # Your application name added in Casdoor
  application-name: dolphinscheduler
  # Doplhinscheduler login url
  redirect-url: http://localhost:5173/login



master:
  listen-port: 5678
  # master fetch command num
  fetch-command-num: 10
  # master prepare execute thread number to limit handle commands in parallel
  pre-exec-threads: 10
  # master execute thread number to limit process instances in parallel
  exec-threads: 10
  # master dispatch task number per batch
  dispatch-task-number: 3
  # master host selector to select a suitable worker, default value: LowerWeight. Optional values include random, round_robin, lower_weight
  host-selector: lower_weight
  # master heartbeat interval
  heartbeat-interval: 10s
  # master commit task retry times
  task-commit-retry-times: 5
  # master commit task interval
  task-commit-interval: 1s
  state-wheel-interval: 5s
  # master max cpuload avg percentage, only higher than the system cpu load average, master server can schedule. default value 1: will use 100% cpu
  max-cpu-load-avg: 1
  # master reserved memory, only lower than system available memory, master server can schedule. default value 0.1, only the available memory is higher than 10%, master server can schedule.
  reserved-memory: 0.1
  # failover interval
  failover-interval: 10m
  # kill yarn/k8s application when failover taskInstance, default true
  kill-application-when-task-failover: true
  worker-group-refresh-interval: 10s

worker:
  # worker listener port
  listen-port: 1234
  # worker execute thread number to limit task instances in parallel
  exec-threads: 10
  # worker heartbeat interval
  heartbeat-interval: 10s
  # worker host weight to dispatch tasks, default value 100
  host-weight: 100
  # tenant corresponds to the user of the system, which is used by the worker to submit the job. If system does not have this user, it will be automatically created after the parameter worker.tenant.auto.create is true.
  tenant-auto-create: true
  #Scenes to be used for distributed users.For example,users created by FreeIpa are stored in LDAP.This parameter only applies to Linux, When this parameter is true, worker.tenant.auto.create has no effect and will not automatically create tenants.
  tenant-distributed-user: false
  # worker max cpuload avg, only higher than the system cpu load average, worker server can be dispatched tasks. default value 1: will use 100% cpu.
  max-cpu-load-avg: 1
  # worker reserved memory, only lower than system available memory, worker server can be dispatched tasks. default value 0.1, only the available memory is higher than 10%, worker server can receive task.
  reserved-memory: 0.1
  task-execute-threads-full-policy: REJECT

alert:
  port: 50052
  # Mark each alert of alert server if late after x milliseconds as failed.
  # Define value is (0 = infinite), and alert server would be waiting alert result.
  wait-timeout: 0
  heartbeat-interval: 60s

api:
  audit-enable: false
  # Traffic control, if you turn on this config, the maximum number of request/s will be limited.
  # global max request number per second
  # default tenant-level max request number
  traffic-control:
    global-switch: false
    max-global-qps-rate: 300
    tenant-switch: false
    default-tenant-qps-rate: 10
      #customize-tenant-qps-rate:
    # eg.
    #tenant1: 11
    #tenant2: 20
  python-gateway:
    # Weather enable python gateway server or not. The default value is true.
    enabled: true
    # Authentication token for connection from python api to python gateway server. Should be changed the default value
    # when you deploy in public network.
    auth-token: jwUDzpLsNKEFER4*a8gruBH_GsAurNxU7A@Xc
    # The address of Python gateway server start. Set its value to `0.0.0.0` if your Python API run in different
    # between Python gateway server. It could be be specific to other address like `127.0.0.1` or `localhost`
    gateway-server-address: 0.0.0.0
    # The port of Python gateway server start. Define which port you could connect to Python gateway server from
    # Python API side.
    gateway-server-port: 25333
    # The address of Python callback client.
    python-address: 127.0.0.1
    # The port of Python callback client.
    python-port: 25334
    # Close connection of socket server if no other request accept after x milliseconds. Define value is (0 = infinite),
    # and socket server would never close even though no requests accept
    connect-timeout: 0
    # Close each active connection of socket server if python program not active after x milliseconds. Define value is
    # (0 = infinite), and socket server would never close even though no requests accept
    read-timeout: 0

server:
  port: 12345
  servlet:
    session:
      timeout: 120m
    context-path: /dolphinscheduler/
  compression:
    enabled: true
    mime-types: text/html,text/xml,text/plain,text/css,text/javascript,application/javascript,application/json,application/xml
  jetty:
    max-http-form-post-size: 5000000

management:
  endpoints:
    web:
      exposure:
        include: health,metrics,prometheus
  endpoint:
    health:
      enabled: true
      show-details: always
  health:
    db:
      enabled: true
    defaults:
      enabled: false
  metrics:
    tags:
      application: ${spring.application.name}

metrics:
  enabled: true

# Override by profile
---
spring:
  config:
    activate:
      on-profile: postgresql
  quartz:
    properties:
      org.quartz.jobStore.driverDelegateClass: org.quartz.impl.jdbcjobstore.PostgreSQLDelegate
  datasource:
    driver-class-name: org.postgresql.Driver
    url: jdbc:postgresql://127.0.0.1:5432/dolphinscheduler
    username: root
    password: root

---
spring:
  config:
    activate:
      on-profile: mysql
  sql:
     init:
       schema-locations: classpath:sql/dolphinscheduler_mysql.sql
  datasource:
    driver-class-name: com.mysql.cj.jdbc.Driver
    url: jdbc:mysql://127.0.0.1:3306/dolphinscheduler?useUnicode=true&characterEncoding=UTF-8
    username: root
    password: root

2.2、mysql的配置

spring:
  jackson:
    time-zone: UTC
    date-format: "yyyy-MM-dd HH:mm:ss"
  banner:
    charset: UTF-8
  cache:
    # default enable cache, you can disable by `type: none`
    type: none
    cache-names:
      - tenant
      - user
      - processDefinition
      - processTaskRelation
      - taskDefinition
    caffeine:
      spec: maximumSize=100,expireAfterWrite=300s,recordStats
  sql:
    init:
      schema-locations: classpath:sql/dolphinscheduler_h2.sql
  datasource:
    #driver-class-name: org.h2.Driver
    #url: jdbc:h2:mem:dolphinscheduler;MODE=MySQL;DB_CLOSE_DELAY=-1;DATABASE_TO_LOWER=true
    #username: sa
    #password: ""
    driver-class-name: com.mysql.cj.jdbc.Driver
    url: jdbc:h2:mem:dolphinscheduler;MODE=MySQL;DB_CLOSE_DELAY=-1;DATABASE_TO_LOWER=true
    username: root
    password: 123456


  quartz:
    job-store-type: jdbc
    jdbc:
      initialize-schema: never
    properties:
      org.quartz.threadPool.threadPriority: 5
      org.quartz.jobStore.isClustered: true
      org.quartz.jobStore.class: org.springframework.scheduling.quartz.LocalDataSourceJobStore
      org.quartz.scheduler.instanceId: AUTO
      org.quartz.jobStore.tablePrefix: QRTZ_
      org.quartz.jobStore.acquireTriggersWithinLock: true
      org.quartz.scheduler.instanceName: DolphinScheduler
      org.quartz.threadPool.class: org.quartz.simpl.SimpleThreadPool
      org.quartz.jobStore.useProperties: false
      org.quartz.threadPool.makeThreadsDaemons: true
      org.quartz.threadPool.threadCount: 25
      org.quartz.jobStore.misfireThreshold: 60000
      org.quartz.scheduler.makeSchedulerThreadDaemon: true
      org.quartz.jobStore.driverDelegateClass: org.quartz.impl.jdbcjobstore.StdJDBCDelegate
      org.quartz.jobStore.clusterCheckinInterval: 5000
      org.quartz.scheduler.batchTriggerAcquisitionMaxCount: 1
  servlet:
    multipart:
      max-file-size: 1024MB
      max-request-size: 1024MB
  messages:
    basename: i18n/messages
  jpa:
    hibernate:
      ddl-auto: none
  mvc:
    pathmatch:
      matching-strategy: ANT_PATH_MATCHER

registry:
  type: zookeeper
  zookeeper:
    namespace: dolphinscheduler
    connect-string: localhost:2181
    retry-policy:
      base-sleep-time: 60ms
      max-sleep: 300ms
      max-retries: 5
    session-timeout: 30s
    connection-timeout: 9s
    block-until-connected: 600ms
    digest: ~

security:
  authentication:
    # Authentication types (supported types: PASSWORD,LDAP,CASDOOR_SSO)
    type: PASSWORD
    # IF you set type `LDAP`, below config will be effective
    ldap:
      # ldap server config
      urls: ldap://ldap.forumsys.com:389/
      base-dn: dc=example,dc=com
      username: cn=read-only-admin,dc=example,dc=com
      password: password
      user:
        # admin userId when you use LDAP login
        admin: read-only-admin
        identity-attribute: uid
        email-attribute: mail
        # action when ldap user is not exist (supported types: CREATE,DENY)
        not-exist-action: CREATE
      ssl:
        enable: false
        # jks file absolute path && password
        trust-store: "/ldapkeystore.jks"
        trust-store-password: ""
    casdoor:
      user:
        admin: admin
    oauth2:
      enable: false
      provider:
        github:
          authorizationUri: "https://github.com/login/oauth/authorize"
          redirectUri: "http://localhost:12345/dolphinscheduler/redirect/login/oauth2"
          clientId: ""
          clientSecret: ""
          tokenUri: "https://github.com/login/oauth/access_token"
          userInfoUri: "https://api.github.com/user"
          callbackUrl: "http://localhost:5173/login"
          iconUri: ""
          provider: github
        gitee:
          authorizationUri: "https://gitee.com/oauth/authorize"
          redirectUri: "http://127.0.0.1:12345/dolphinscheduler/redirect/login/oauth2"
          clientId: ""
          clientSecret: ""
          tokenUri: "https://gitee.com/oauth/token?grant_type=authorization_code"
          userInfoUri: "https://gitee.com/api/v5/user"
          callbackUrl: "http://127.0.0.1:5173/login"
          iconUri: ""
          provider: gitee

casdoor:
  # Your Casdoor server url
  endpoint: http://localhost:8000
  client-id: ""
  client-secret: ""
  # The certificate may be multi-line, you can use `|-` for ease
  certificate: ""
  # Your organization name added in Casdoor
  organization-name: built-in
  # Your application name added in Casdoor
  application-name: dolphinscheduler
  # Doplhinscheduler login url
  redirect-url: http://localhost:5173/login



master:
  listen-port: 5678
  # master fetch command num
  fetch-command-num: 10
  # master prepare execute thread number to limit handle commands in parallel
  pre-exec-threads: 10
  # master execute thread number to limit process instances in parallel
  exec-threads: 10
  # master dispatch task number per batch
  dispatch-task-number: 3
  # master host selector to select a suitable worker, default value: LowerWeight. Optional values include random, round_robin, lower_weight
  host-selector: lower_weight
  # master heartbeat interval
  heartbeat-interval: 10s
  # master commit task retry times
  task-commit-retry-times: 5
  # master commit task interval
  task-commit-interval: 1s
  state-wheel-interval: 5s
  # master max cpuload avg percentage, only higher than the system cpu load average, master server can schedule. default value 1: will use 100% cpu
  max-cpu-load-avg: 1
  # master reserved memory, only lower than system available memory, master server can schedule. default value 0.1, only the available memory is higher than 10%, master server can schedule.
  reserved-memory: 0.1
  # failover interval
  failover-interval: 10m
  # kill yarn/k8s application when failover taskInstance, default true
  kill-application-when-task-failover: true
  worker-group-refresh-interval: 10s

worker:
  # worker listener port
  listen-port: 1234
  # worker execute thread number to limit task instances in parallel
  exec-threads: 10
  # worker heartbeat interval
  heartbeat-interval: 10s
  # worker host weight to dispatch tasks, default value 100
  host-weight: 100
  # tenant corresponds to the user of the system, which is used by the worker to submit the job. If system does not have this user, it will be automatically created after the parameter worker.tenant.auto.create is true.
  tenant-auto-create: true
  #Scenes to be used for distributed users.For example,users created by FreeIpa are stored in LDAP.This parameter only applies to Linux, When this parameter is true, worker.tenant.auto.create has no effect and will not automatically create tenants.
  tenant-distributed-user: false
  # worker max cpuload avg, only higher than the system cpu load average, worker server can be dispatched tasks. default value 1: will use 100% cpu.
  max-cpu-load-avg: 1
  # worker reserved memory, only lower than system available memory, worker server can be dispatched tasks. default value 0.1, only the available memory is higher than 10%, worker server can receive task.
  reserved-memory: 0.1
  task-execute-threads-full-policy: REJECT

alert:
  port: 50052
  # Mark each alert of alert server if late after x milliseconds as failed.
  # Define value is (0 = infinite), and alert server would be waiting alert result.
  wait-timeout: 0
  heartbeat-interval: 60s

api:
  audit-enable: false
  # Traffic control, if you turn on this config, the maximum number of request/s will be limited.
  # global max request number per second
  # default tenant-level max request number
  traffic-control:
    global-switch: false
    max-global-qps-rate: 300
    tenant-switch: false
    default-tenant-qps-rate: 10
      #customize-tenant-qps-rate:
    # eg.
    #tenant1: 11
    #tenant2: 20
  python-gateway:
    # Weather enable python gateway server or not. The default value is true.
    enabled: true
    # Authentication token for connection from python api to python gateway server. Should be changed the default value
    # when you deploy in public network.
    auth-token: jwUDzpLsNKEFER4*a8gruBH_GsAurNxU7A@Xc
    # The address of Python gateway server start. Set its value to `0.0.0.0` if your Python API run in different
    # between Python gateway server. It could be be specific to other address like `127.0.0.1` or `localhost`
    gateway-server-address: 0.0.0.0
    # The port of Python gateway server start. Define which port you could connect to Python gateway server from
    # Python API side.
    gateway-server-port: 25333
    # The address of Python callback client.
    python-address: 127.0.0.1
    # The port of Python callback client.
    python-port: 25334
    # Close connection of socket server if no other request accept after x milliseconds. Define value is (0 = infinite),
    # and socket server would never close even though no requests accept
    connect-timeout: 0
    # Close each active connection of socket server if python program not active after x milliseconds. Define value is
    # (0 = infinite), and socket server would never close even though no requests accept
    read-timeout: 0

server:
  port: 12345
  servlet:
    session:
      timeout: 120m
    context-path: /dolphinscheduler/
  compression:
    enabled: true
    mime-types: text/html,text/xml,text/plain,text/css,text/javascript,application/javascript,application/json,application/xml
  jetty:
    max-http-form-post-size: 5000000

management:
  endpoints:
    web:
      exposure:
        include: health,metrics,prometheus
  endpoint:
    health:
      enabled: true
      show-details: always
  health:
    db:
      enabled: true
    defaults:
      enabled: false
  metrics:
    tags:
      application: ${spring.application.name}

metrics:
  enabled: true

# Override by profile
---
spring:
  config:
    activate:
      on-profile: postgresql
  quartz:
    properties:
      org.quartz.jobStore.driverDelegateClass: org.quartz.impl.jdbcjobstore.PostgreSQLDelegate
  datasource:
    driver-class-name: org.postgresql.Driver
    url: jdbc:postgresql://127.0.0.1:5432/dolphinscheduler
    username: root
    password: root

---
spring:
  config:
    activate:
      on-profile: mysql
  sql:
     init:
       schema-locations: classpath:sql/dolphinscheduler_mysql.sql
  datasource:
    driver-class-name: com.mysql.cj.jdbc.Driver
    url: jdbc:mysql://127.0.0.1:3306/dolphinscheduler?useUnicode=true&characterEncoding=UTF-8
    username: root
    password: root

2.3、dolphinscheduler_env.sh配置

export JAVA_HOME=${JAVA_HOME:-/usr/local/jdk1.8.0_391}
export REGISTRY_ZOOKEEPER_CONNECT_STRING=${REGISTRY_ZOOKEEPER_CONNECT_STRING:-node4:2181}
export DATABASE=${DATABASE:-mysql}
export SPRING_PROFILES_ACTIVE=${DATABASE}
export SPRING_DATASOURCE_URL="jdbc:mysql://192.168.1.103:3306/dolphinscheduler?useUnicode=true&characterEncoding=UTF-8&useSSL=false&allowPublicKeyRetrieval=true"
export SPRING_DATASOURCE_USERNAME=root
export SPRING_DATASOURCE_PASSWORD=123456

三、dolphinscheduler操作

3.1、启动dolphinscheduler

 ./dolphinscheduler-daemon.sh start standalone-server

默认是采用H2的内存数据库,所以任务不能创建太多。

3.2、查看dolphinscheduler状态

[root@node4 bin]# ./dolphinscheduler-daemon.sh status standalone-server
Begin status standalone-server......
standalone-server  [  RUNNING  ]
End status standalone-server.

3.3、停止dolphinscheduler

 ./dolphinscheduler-daemon.sh stop standalone-server

3.4、查看dolphinscheduler进程

[root@node4 bin]# jps
36069 Jps
35849 StandaloneServer

3.5、访问dolphinscheduler

http://192.168.42.142:12345/dolphinscheduler/ui
默认登录用户名/密码:admin/dolphinscheduler123

3.6、访问界面

<think>好的,我现在需要帮助用户解决一个关于部署的问题,涉及多个组件:Seatunnel、Spark、Apache DolphinScheduler 3.2.0MySQL作为源库、Oracle作为目标库,以及JDK 1.8。用户希望得到部署方案。 首先,我需要确认各个组件的版本兼容性。用户提到的DolphinScheduler 3.2.0版本,根据引用内容,确实支持MySQL作为元数据存储,并且单机启动的方式已经验证过[^2]。同时,用户需要将MySQL作为源库,Oracle作为目标库,这意味着数据同步的任务需要配置这两个数据库的连接。 接下来,Seatunnel(之前称为Waterdrop)是一个数据处理工具,支持Spark作为引擎。用户提到使用Spark,所以需要确认Seatunnel的版本是否兼容Spark以及JDK 1.8。一般来说,Seatunnel v2.x与Spark 2.4.x或3.x兼容,但需要检查具体版本要求。此外,JDK 1.8可能没有问题,但需要注意某些新版本组件可能要求更高版本的JDK,这里用户明确要求JDK 1.8,可能需要确认各组件的兼容性。 然后是Apache DolphinScheduler的部署步骤。根据引用[^2],单机启动需要修改元数据存储MySQL,并且复制MySQL连接器到指定目录。用户提供的引用中提到了将mysql-connector-java-8.0.24.jar复制到api-server/libs目录下,这步骤是必要的,否则无法连接MySQL。因此,在部署DolphinScheduler时,需要确保MySQL驱动正确放置,并且配置文件如conf/dao/data_source.properties中的数据库连接信息正确配置。 关于数据同步任务,用户需要从MySQL到Oracle的数据传输。Seatunnel的配置文件中需要定义source和sink部分。MySQL作为源,需要JDBC连接信息,包括URL、用户名、密码、表名等。Oracle作为目标,同样需要JDBC配置,并且需要Oracle的JDBC驱动,比如ojdbc8.jar,应放入Seatunnel的plugins目录。 接下来是调度任务的配置。在DolphinScheduler中创建项目,定义工作流,添加Shell任务来执行Seatunnel的作业。需要确保DolphinScheduler能够调用Seatunnel的命令,并且环境变量如SPARK_HOME、JAVA_HOME正确设置。可能还需要在DolphinScheduler的Env文件中配置这些变量。 另外,组件之间的版本兼容性需要特别注意。例如,DolphinScheduler 3.2.0是否支持Spark的某个版本,以及Seatunnel与Spark的兼容性。此外,Oracle的JDBC驱动版本是否与JDK 1.8兼容,ojdbc8适用于Java 8,所以应该没问题。 用户可能遇到的问题包括:驱动未正确放置导致连接失败,配置文件中的数据库连接参数错误,环境变量未设置导致命令无法执行,或者版本不兼容导致的运行时错误。需要指导用户逐一检查这些方面,并提供日志查看的方法,例如查看DolphinScheduler的日志文件,或Seatunnel执行时的错误输出。 最后,生成相关问题需要考虑用户可能进一步的需求,比如性能优化、高可用部署、监控配置、错误处理以及安全措施等。这些都是部署后可能遇到的后续问题,提前准备这些问题有助于用户深入学习和解决问题。</think>以下为基于Seatunnel、Spark、Apache DolphinScheduler 3.2.0的部署方案(MySQL→Oracle数据同步): ### 一、环境准备 1. **基础组件部署顺序** - 安装JDK 1.8并配置环境变量$JAVA_HOME - 部署MySQL 5.7+/8.0作为DolphinScheduler数据库 - 安装Spark 2.4.x/3.x并配置$SPARK_HOME - 部署Seatunnel v2.3.0+(需适配Spark版本) - 配置Oracle 12c+/19c作为目标库 2. **关键配置项验证** ```bash # 验证Java版本 java -version # 应显示"1.8.0_xxx" # 验证Spark可用性 spark-submit --version ``` ### 二、DolphinScheduler 3.2.0部署 1. **元数据存储配置** 修改`conf/dao/data_source.properties`: ```properties spring.datasource.driver-class-name=com.mysql.cj.jdbc.Driver spring.datasource.url=jdbc:mysql://your-mysql-host:3306/dolphinscheduler?useUnicode=true&characterEncoding=UTF-8 spring.datasource.username=ds_user spring.datasource.password=your_password ``` 需将`mysql-connector-java-8.0.24.jar`放入`api-server/libs`和`alert-server/libs`目录[^1] 2. **单机启动命令** ```bash ./bin/dolphinscheduler-daemon.sh start standalone-server ``` ### 三、Seatunnel数据同步配置 1. **Oracle驱动准备** 下载`ojdbc8.jar`并放入: ``` seatunnel/plugins/jdbc/lib/ ``` 2. **配置文件示例(mysql_to_oracle.conf)** ```json spark { spark.app.name = "MySQL2Oracle" spark.executor.instances = 2 spark.executor.cores = 2 spark.executor.memory = "2g" } source { jdbc { driver = "com.mysql.cj.jdbc.Driver" url = "jdbc:mysql://mysql-host:3306/source_db" username = "src_user" password = "src_pwd" table = "source_table" } } transform { # 可添加字段映射或过滤规则 } sink { jdbc { driver = "oracle.jdbc.OracleDriver" url = "jdbc:oracle:thin:@//oracle-host:1521/ORCL" username = "tgt_user" password = "tgt_pwd" table = "target_table" save_mode = "overwrite" } } ``` ### 四、任务调度集成 1. **DolphinScheduler任务配置** - 创建项目 → 新建工作流 → 添加Shell任务 ```shell #!/bin/bash $SEATUNNEL_HOME/bin/start-seatunnel-spark.sh \ --config $CONFIG_PATH/mysql_to_oracle.conf \ --deploy-mode client \ --master yarn ``` 2. **环境变量配置** 在`bin/env/dolphinscheduler_env.sh`中添加: ```shell export SPARK_HOME=/opt/spark export SEATUNNEL_HOME=/opt/seatunnel export PATH=$JAVA_HOME/bin:$SPARK_HOME/bin:$PATH ``` ### 五、验证流程 1. **执行顺序验证** MySQL数据库启动DolphinScheduler服务启动 → Spark集群就绪 → 执行同步任务 2. **故障排查点** - 检查`logs/api-server.log`中的数据库连接状态[^2] - 验证Oracle TNSPING连通性 - 查看Seatunnel的`seatunnel-spark.log`
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值