用python实现vmware esxi虚拟机断电自动关机

用python实现vmware esxi虚拟机断电自动关机

一、前言

机房服务器一般配备UPS,当市电停掉时,仍然到维持一段时间。但是,如果长时间停电,UPS也撑不了。此时就需要及时关机以确保数据安全。很多年以前用过winpower软件,通过目标系统安装agent,并用winPower连接UPS,当UPS电量低于设置的阈值时,给各个agent下发关机指令,以实现自动关机。这种方式可以充分利用UPS电量,较为精准的判断电池续航情况,并采取相应动作,但是操作起来有点麻烦。

另一种方式是采用自动化脚本的方式,通过循环ping 某个常接市电设备ip,当失败超过一定次数,触发powerOffVMs,最后shutdown esxi系统。这种方式无需与UPS通讯,只要市电断开,超过一段时间就执行关机指令,超作起来比较方便。但是,这种方式可能存在误判,比如交换机挂了,此时ping设备必然失败,最终会触发关机。因此下面介绍一种更为稳妥的方案。

二、实现

  1. 原理

    利用esxcli 来esxcli hardware ipmi sdr list | grep "Power\ Supply\ 1\ Status\|Power\ Supply\ 2\ Status"来检测电源是否正常。当其中一路电源断开或异常时,调用powerOffVms.py关闭各虚拟机,最后再调用poweroff 来关闭esxi。

    其实还可以利用snmp连接到idrac,也是通过检查电源状态来实现自动监控,但是这种方式又依赖网络了,跟上面提到的方式二差不多。

    image-20231215002800664

==注意,这里默认服务器都具备双路电源,如果只有一路电源,那不适用此方法。==

  1. 代码

    源码如下(有点啰嗦,几年前写的,将就看吧),racadm是为了兼容R710,因为R710的esxcli 指令不支持获取电源状态。执行python脚本后,会修改etc/rc.local.d/local.sh以实现开机自动执行脚本。本例中powerOffDelay是指断电20分钟后就触发自动关机指令。但是当断电期间,又来电时,会重新计时。

    本代码在R710至R750上测试通过,不代表你的也可以,仅作参考 ,请根据自己实际情况决定是否使用。
    #!/usr/bin/python
    from genericpath import exists, isfile
    import os
    import time
    import subprocess
    import fileinput
    import re
    import sys
    # version 20190916  usage: add python autoShutdown.py to  /etc/rc.local.d/local.sh   and uncomment  
    # by root6819 Q:302777528 site: www.qipanet.com
    # tools Path
    basePath = os.path.split(os.path.realpath(__file__))[0]
    # the time(second) when you want to shutdown while check power False
    fPowerOff = basePath + '/powerOffVms_py3.0.py'
    # Racadm-Dell-EMC-Web-9.3.0-3379.VIB-ESX67i.zip
    fRacadm = basePath+'/Racadm-Dell-EMC-Web-9.3.0-3379.VIB-ESX67i.zip'
    fLocalOld = '/etc/rc.local.d/local.sh'
    fLocalNew = basePath+'/local.sh.bak'
    interval = 15  # must >=5s,everty check will cost some time.  per 15s trigger a check
    powerOffDelay = 1200  # powerOff after supply off in   20Minutes
    iFailTimes = 0  # +1 when check Power Off,if ok then reset iFailTimes=0 , if iFailTimes*interval>powerOffDelay then powerOff
    machine ="" # use to check if it is R710 machine
    def getMachine():
        cmd='localcli hardware platform get |grep R710'
        machine=os.popen(cmd).read().strip()
        showMsg("the machine is R710 ?"+ machine)
    
    def modify_file(file_name, pattern, value=""):
        writeLog('now modify file '+file_name+'...')
        try:
            fh = fileinput.input(file_name, inplace=True)
            for line in fh:
                replacement = value + line
                line = re.sub(pattern, replacement, line)
                sys.stdout.write(line)
            fh.close()
            writeLog('write Ok')
            return True
        except Exception as e:
            writeLog(e)
            return False
    
    
    def myPopen(cmd):
        p = subprocess.Popen(
            cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
        ret, err = p.communicate()
        return ret.decode('utf-8', 'ignore'), err.decode('utf-8', 'ignore')
    
    
    def writeLog(msg):
        path = basePath+"/Log"
        if not os.path.exists(path):
            os.makedirs(path)
        showMsg(msg)
        f = open(path+"/log.txt", mode='a+')
        strMsg = '[{0}]: {1} \n'.format(time.strftime('%Y-%m-%d %H:%M:%S'), msg)
    
        f.writelines(strMsg)
        f.close()
    
    
    def showMsg(msg):
        strMsg = '[{0}]: {1} \n'.format(time.strftime('%Y-%m-%d %H:%M:%S'), msg)
        print(strMsg)
    
    
    def checkLocalSh():
        try:
            # check local.sh is config right
            with open(fLocalOld, mode='r+') as f:
                tmpStr = f.read()
                # python /vmfs/volumes/datastore_ssd/tools/autoShutdown.py &
                if not 'autoShutdown3.0.py &' in tmpStr:
                    return False
                return True
        except Exception as e:
            writeLog('checkLocalShErr>>')
            writeLog(e)
            return False
    
    
    def shutDownAll():
        cmd = fPowerOff
        # esxi6.7 powerOffVms use old version script,so we must reWrite the script
        writeLog('powerOffVms path>>%s ' % cmd)
        result = os.popen(cmd).read()
        writeLog('powerOffVms result>>%s ' % result)
        writeLog('now poweroff')
        os.popen('poweroff')
    
    
    def checkPS():
        
        #check is R710 
        if 'R710' in machine:
            cmd = 'racadm getsensorinfo |grep  ^PS.*AC |grep -c Present'
            result = os.popen(cmd).read().strip()
            if result == '2':
                #showMsg('checkPs OK')
                return True
            else:
                #showMsg(str(iFailTimes)+'times checkPs not OK>>'+result)
                return False
        else:
            #Power Supply AC lost or Presence detected  
            cmd = 'localcli hardware  ipmi sdr list | grep "Power\ Supply\ 1\ Status\|Power\ Supply\ 2\ Status"'
            result = os.popen(cmd).read().strip().lower()
            if not 'detected' in result:
                #must have at lease one supply detected in result
                showMsg(str(iFailTimes)+'checkPS() result null,check again...')
                return checkPS()
            if 'lost' in result:
                showMsg(str(iFailTimes)+'times checkPs not OK>>'+result)
                return False
            else:
                return True
    
    def checkRacadm():
        # check racadm support
        _cmd = 'racadm version |grep  -w version'
        _result, _err = myPopen(_cmd)
    
        if 'not found' in _err or '不是内部或外部命令' in _err:
            writeLog('racadm tool not install,pls install it first!')
            return False
        writeLog('racadm check oK,the Version is>>'+_result)
        return True
    
    
    if not checkRacadm():
        '''check FirstTime if not exists auto install '''
        if not os.path.exists(fRacadm):
            writeLog(fRacadm + ' file not exists,could not auto instal!')
            exit()
        writeLog(fRacadm + ' try auto install...')
        _result, _err = myPopen('esxcli software vib install -d '+fRacadm)
        writeLog('install result>>'+_result)
        if _err or _err is not '':
            writeLog('install _err>>'+_err)
    
        # check again
        if not checkRacadm():
            exit()
    
    if not checkLocalSh():
        # bak then goto write
        _result, _err = myPopen('cp -f '+fLocalOld+' ' + fLocalNew)
        writeLog('cp Result>>'+_result)
        if _err or _err is not '':
            writeLog('cp err>>'+_err)
            exit()
        _str = 'python '+basePath+'/autoShutdown3.0.py &\n'
        if not modify_file(fLocalOld, 'exit 0', _str):
            exit()
        # check again
        if not checkLocalSh():
            writeLog('checkLocalSh twice failed,exit now!')
            exit()
    
    writeLog('checkLocalSh Ok!')
    
    _fPath = basePath + '/powerOffVms_py3.0.py'
    if not os.path.exists(_fPath):
        writeLog(_fPath+' file not exists,pls confirm!')
        exit()
    writeLog(_fPath+' check file OK!')
    
    getMachine()
    isFirst = True
    while iFailTimes*interval < powerOffDelay:
        if checkPS():
            iFailTimes = 0
            if isFirst:
                isFirst = False
                writeLog('checkPS OK')
        else:
            isFirst = True
            iFailTimes += 1
            if iFailTimes == 2:
                writeLog('checkPS failed,maybe a power supply is lost!!!')
     
        # because checkPS will costs about 3 seconds
        time.sleep(interval-3)
    
    shutDownAll()
    

评论已关闭