Logo Search packages:      
Sourcecode: ganeti version File versions  Download package

def lib::cmdlib::LUReplaceDisks::_ExecD8Secondary (   self,
  feedback_fn 
) [private]

Replace the secondary node for drbd8.

The algorithm for replace is quite complicated:
  - for all disks of the instance:
- create new LVs on the new node with same names
- shutdown the drbd device on the old secondary
- disconnect the drbd network on the primary
- create the drbd device on the new secondary
- network attach the drbd on the primary, using an artifice:
  the drbd code for Attach() will connect to the network if it
  finds a device which is connected to the good local disks but
  not network enabled
  - wait for sync across all devices
  - remove all disks from the old secondary

Failures are not very well handled.

Definition at line 3666 of file cmdlib.py.

03666                                          :
    """Replace the secondary node for drbd8.

    The algorithm for replace is quite complicated:
      - for all disks of the instance:
        - create new LVs on the new node with same names
        - shutdown the drbd device on the old secondary
        - disconnect the drbd network on the primary
        - create the drbd device on the new secondary
        - network attach the drbd on the primary, using an artifice:
          the drbd code for Attach() will connect to the network if it
          finds a device which is connected to the good local disks but
          not network enabled
      - wait for sync across all devices
      - remove all disks from the old secondary

    Failures are not very well handled.

    """
    steps_total = 6
    warning, info = (self.proc.LogWarning, self.proc.LogInfo)
    instance = self.instance
    iv_names = {}
    vgname = self.cfg.GetVGName()
    # start of work
    cfg = self.cfg
    old_node = self.tgt_node
    new_node = self.new_node
    pri_node = instance.primary_node

    # Step: check device activation
    self.proc.LogStep(1, steps_total, "check device existence")
    info("checking volume groups")
    my_vg = cfg.GetVGName()
    results = rpc.call_vg_list([pri_node, new_node])
    if not results:
      raise errors.OpExecError("Can't list volume groups on the nodes")
    for node in pri_node, new_node:
      res = results.get(node, False)
      if not res or my_vg not in res:
        raise errors.OpExecError("Volume group '%s' not found on %s" %
                                 (my_vg, node))
    for dev in instance.disks:
      if not dev.iv_name in self.op.disks:
        continue
      info("checking %s on %s" % (dev.iv_name, pri_node))
      cfg.SetDiskID(dev, pri_node)
      if not rpc.call_blockdev_find(pri_node, dev):
        raise errors.OpExecError("Can't find device %s on node %s" %
                                 (dev.iv_name, pri_node))

    # Step: check other node consistency
    self.proc.LogStep(2, steps_total, "check peer consistency")
    for dev in instance.disks:
      if not dev.iv_name in self.op.disks:
        continue
      info("checking %s consistency on %s" % (dev.iv_name, pri_node))
      if not _CheckDiskConsistency(self.cfg, dev, pri_node, True, ldisk=True):
        raise errors.OpExecError("Primary node (%s) has degraded storage,"
                                 " unsafe to replace the secondary" %
                                 pri_node)

    # Step: create new storage
    self.proc.LogStep(3, steps_total, "allocate new storage")
    for dev in instance.disks:
      size = dev.size
      info("adding new local storage on %s for %s" % (new_node, dev.iv_name))
      # since we *always* want to create this LV, we use the
      # _Create...OnPrimary (which forces the creation), even if we
      # are talking about the secondary node
      for new_lv in dev.children:
        if not _CreateBlockDevOnPrimary(cfg, new_node, instance, new_lv,
                                        _GetInstanceInfoText(instance)):
          raise errors.OpExecError("Failed to create new LV named '%s' on"
                                   " node '%s'" %
                                   (new_lv.logical_id[1], new_node))

      iv_names[dev.iv_name] = (dev, dev.children)

    self.proc.LogStep(4, steps_total, "changing drbd configuration")
    for dev in instance.disks:
      size = dev.size
      info("activating a new drbd on %s for %s" % (new_node, dev.iv_name))
      # create new devices on new_node
      new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
                              logical_id=(pri_node, new_node,
                                          dev.logical_id[2]),
                              children=dev.children)
      if not _CreateBlockDevOnSecondary(cfg, new_node, instance,
                                        new_drbd, False,
                                      _GetInstanceInfoText(instance)):
        raise errors.OpExecError("Failed to create new DRBD on"
                                 " node '%s'" % new_node)

    for dev in instance.disks:
      # we have new devices, shutdown the drbd on the old secondary
      info("shutting down drbd for %s on old node" % dev.iv_name)
      cfg.SetDiskID(dev, old_node)
      if not rpc.call_blockdev_shutdown(old_node, dev):
        warning("Failed to shutdown drbd for %s on old node" % dev.iv_name,
                "Please cleanup this device manually as soon as possible")

    info("detaching primary drbds from the network (=> standalone)")
    done = 0
    for dev in instance.disks:
      cfg.SetDiskID(dev, pri_node)
      # set the physical (unique in bdev terms) id to None, meaning
      # detach from network
      dev.physical_id = (None,) * len(dev.physical_id)
      # and 'find' the device, which will 'fix' it to match the
      # standalone state
      if rpc.call_blockdev_find(pri_node, dev):
        done += 1
      else:
        warning("Failed to detach drbd %s from network, unusual case" %
                dev.iv_name)

    if not done:
      # no detaches succeeded (very unlikely)
      raise errors.OpExecError("Can't detach at least one DRBD from old node")

    # if we managed to detach at least one, we update all the disks of
    # the instance to point to the new secondary
    info("updating instance configuration")
    for dev in instance.disks:
      dev.logical_id = (pri_node, new_node) + dev.logical_id[2:]
      cfg.SetDiskID(dev, pri_node)
    cfg.Update(instance)

    # and now perform the drbd attach
    info("attaching primary drbds to new secondary (standalone => connected)")
    failures = []
    for dev in instance.disks:
      info("attaching primary drbd for %s to new secondary node" % dev.iv_name)
      # since the attach is smart, it's enough to 'find' the device,
      # it will automatically activate the network, if the physical_id
      # is correct
      cfg.SetDiskID(dev, pri_node)
      if not rpc.call_blockdev_find(pri_node, dev):
        warning("can't attach drbd %s to new secondary!" % dev.iv_name,
                "please do a gnt-instance info to see the status of disks")

    # this can fail as the old devices are degraded and _WaitForSync
    # does a combined result over all disks, so we don't check its
    # return value
    self.proc.LogStep(5, steps_total, "sync devices")
    _WaitForSync(cfg, instance, self.proc, unlock=True)

    # so check manually all the devices
    for name, (dev, old_lvs) in iv_names.iteritems():
      cfg.SetDiskID(dev, pri_node)
      is_degr = rpc.call_blockdev_find(pri_node, dev)[5]
      if is_degr:
        raise errors.OpExecError("DRBD device %s is degraded!" % name)

    self.proc.LogStep(6, steps_total, "removing old storage")
    for name, (dev, old_lvs) in iv_names.iteritems():
      info("remove logical volumes for %s" % name)
      for lv in old_lvs:
        cfg.SetDiskID(lv, old_node)
        if not rpc.call_blockdev_remove(old_node, lv):
          warning("Can't remove LV on old secondary",
                  "Cleanup stale volumes by hand")

  def Exec(self, feedback_fn):


Generated by  Doxygen 1.6.0   Back to index