全国服务热线:400-6263-721

位置:南昌达内IT教育培训学校 > 学校动态 > Spark任务和集群启动流程

Spark任务和集群启动流程

来源:南昌达内IT教育培训学校时间:2022/5/11 17:13:28

  1.调用start-all.sh脚本,开始启动Master

  2.Master启动以后,preStart方法调用了一个定时器,定时检查超时的Worker后删除

  3.启动脚本会解析slaves配置文件,找到启动Worker的相应节点.开始启动Worker

  4.Worker服务启动后开始调用preStart方法开始向所有的Master进行注册

  5.Master接收到Worker发送过来的注册信息,Master开始保存注册信息并把自己的URL响应给Worker

  6.Worker接收到Master的URL后并更新,开始调用一个定时器,定时的向Master发送心跳信息

  任务提交流程

  1.Driver端会通过spark-submit脚本启动SaparkSubmit进程,此时创建了一个非常重要的对象(SparkContext),开始向Master发送消息

  2.Master接收到发送过来的信息后开始生成任务信息,并把任务信息放到一个对列里

  3.Master把所有有效的Worker过滤出来,按照空闲的资源进行排序

  4.Master开始向有效的Worker通知拿取任务信息并启动相应的Executor

  5.Worker启动Executor并向Driver反向注册

  6.Driver开始把生成的task发送给相应的Executor,Executor开始执行任务

  集群启动流程

  1.首先创建Master类

  import akka.actor.{Actor, ActorSystem, Props}

  import com.typesafe.config.{Config, ConfigFactory}

  import scala.collection.mutable

  import scala.concurrent.duration._

  class Master(val masterHost: String, val masterPort: Int) extends Actor{

  // 用来存储Worker的注册信息

  val idToWorker = new mutable.HashMap[String, WorkerInfo]()

  // 用来存储Worker的信息

  val workers = new mutable.HashSet[WorkerInfo]()

  // Worker的超时时间间隔

  val checkInterval: Long = 15000

  // 生命周期方法,在构造器之后,receive方法之前只调用一次

  override def preStart(): Unit = {

  // 启动一个定时器,用来定时检查超时的Worker

  import context.dispatcher

  context.system.scheduler.schedule(0 millis, checkInterval millis, self, CheckTimeOutWorker)

  }

  // 在preStart方法之后,不断的重复调用

  override def receive: Receive = {

  // Worker -> Master

  case RegisterWorker(id, host, port, memory, cores) => {

  if (!idToWorker.contains(id)){

  val workerInfo = new WorkerInfo(id, host, port, memory, cores)

  idToWorker += (id -> workerInfo)

  workers += workerInfo

  println("a worker registered")

  sender ! RegisteredWorker(s"akka.tcp://${Master.MASTER_SYSTEM}" +

  s"@${masterHost}:${masterPort}/user/${Master.MASTER_ACTOR}")

  }

  }

  case HeartBeat(workerId) => {

  // 通过传过来的workerId获取对应的WorkerInfo

  val workerInfo: WorkerInfo = idToWorker(workerId)

  // 获取当前时间

  val currentTime = System.currentTimeMillis()

  // 更新较后一次心跳时间

  workerInfo.lastHeartbeatTime = currentTime

  }

  case CheckTimeOutWorker => {

  val currentTime = System.currentTimeMillis()

  val toRemove: mutable.HashSet[WorkerInfo] =

  workers.filter(w => currentTime - w.lastHeartbeatTime > checkInterval)

  // 将超时的Worker从idToWorker和workers中移除

  toRemove.foreach(deadWorker => {

  idToWorker -= deadWorker.id

  workers -= deadWorker

  })

  println(s"num of workers: ${workers.size}")

  }

  }

  }

  object Master{

  val MASTER_SYSTEM = "MasterSystem"

  val MASTER_ACTOR = "Master"

  def main(args: Array[String]): Unit = {

  val host = args(0)

  val port = args(1).toInt

  val configStr =

  s"""

  |akka.actor.provider = "akka.remote.RemoteActorRefProvider"

  |akka.remote.netty.tcp.hostname = "$host"

  |akka.remote.netty.tcp.port = "$port"

  """.stripMargin

  // 配置创建Actor需要的配置信息

  val config: Config = ConfigFactory.parseString(configStr)

  // 创建ActorSystem

  val actorSystem: ActorSystem = ActorSystem(MASTER_SYSTEM, config)

  // 用actorSystem实例创建Actor

  actorSystem.actorOf(Props(new Master(host, port)), MASTER_ACTOR)

  actorSystem.awaitTermination()

  }

  }

  2.创建RemoteMsg特质

  trait RemoteMsg extends Serializable{

  }

  // Master -> self(Master)

  case object CheckTimeOutWorker

  // Worker -> Master

  case class RegisterWorker(id: String, host: String,

  port: Int, memory: Int, cores: Int) extends RemoteMsg

  // Master -> Worker

  case class RegisteredWorker(masterUrl: String) extends RemoteMsg

  // Worker -> self

  case object SendHeartBeat

  // Worker -> Master(HeartBeat)

  case class HeartBeat(workerId: String) extends RemoteMsg

  3.创建Worker类

  import java.util.UUID

  import akka.actor.{Actor, ActorRef, ActorSelection, ActorSystem, Props}

  import com.typesafe.config.{Config, ConfigFactory}

  import scala.concurrent.duration._

  class Worker(val host: String, val port: Int, val masterHost: String,

  val masterPort: Int, val memory: Int, val cores: Int) extends Actor{

  // 生成一个Worker ID

  val workerId = UUID.randomUUID().toString

  // 用来存储MasterURL

  var masterUrl: String = _

  // 心跳时间间隔

  val heartBeat_interval: Long = 10000

  // master的Actor

  var master: ActorSelection = _

  override def preStart(){

  // 获取Master的Actor

  master = context.actorSelection(s"akka.tcp://${Master.MASTER_SYSTEM}" +

  s"@${masterHost}:${masterPort}/user/${Master.MASTER_ACTOR}")

  master ! RegisterWorker(workerId, host, port, memory, cores)

  }

  override def receive: Receive = {

  // Worker接收到Master发送过来的注册成功的信息(masterUrl)

  case RegisteredWorker(masterUrl) => {

  this.masterUrl = masterUrl

  // 启动一个定时器,定时给Master发送心跳

  import context.dispatcher

  context.system.scheduler.schedule(0 millis, heartBeat_interval millis, self, SendHeartBeat)

  }

  case SendHeartBeat => {

  // 向Master发送心跳

  master ! HeartBeat(workerId)

  }

  }

  }

  object Worker{

  val WORKER_SYSTEM = "WorkerSystem"

  val WORKER_ACTOR = "Worker"

  def main(args: Array[String]): Unit = {

  val host = args(0)

  val port = args(1).toInt

  val masterHost = args(2)

  val masterPort = args(3).toInt

  val memory = args(4).toInt

  val cores = args(5).toInt

  val configStr =

  s"""

  |akka.actor.provider = "akka.remote.RemoteActorRefProvider"

  |akka.remote.netty.tcp.hostname = "$host"

  |akka.remote.netty.tcp.port = "$port"

  """.stripMargin

  // 配置创建Actor需要的配置信息

  val config: Config = ConfigFactory.parseString(configStr)

  // 创建ActorSystem

  val actorSystem: ActorSystem = ActorSystem(WORKER_SYSTEM, config)

  // 用actorSystem实例创建Actor

  val worker: ActorRef = actorSystem.actorOf(

  Props(new Worker(host, port, masterHost, masterPort, memory, cores)), WORKER_ACTOR)

  actorSystem.awaitTermination()

  }

  }

  4.创建初始化类

  class WorkerInfo(val id: String, val host: String, val port: Int,

  val memory: Int, val cores: Int) {

  // 初始化较后一次心跳的时间

  var lastHeartbeatTime: Long = _

  }

  5.本地测试需要传入参数

领取试听课
每天限量名额,先到先得

尊重原创文章,转载请注明出处与链接:http://www.peixun360.com/1810/news/523452/违者必究! 以上就是南昌达内IT教育培训学校 小编为您整理 Spark任务和集群启动流程的全部内容。

温馨提示:提交留言后老师会第一时间与您联系!热线电话:400-6263-721