selenium编写爬虫实现窗口切换和操作iframe内网页元素

ixiaoye 类库工具 2020-04-03 1185 0 selenium

selenium使用过程记录,主要记录窗体切换、查询主题、关闭以及iframe和主文档之间的切换,还有xlrd、xlwt的简单使用等。

browser.switch_to.window(irobot.browser.window_handles[0]) 切换回第一个窗体
irobot.browser.find_element_by_css_selector('iframe[src*="SrProblemProcessSimpleQuery"]') #定位iframe
irobot.browser.switch_to.default_content() #返回默认的文档
import datetime
import random
import xlrd
import time
import xlwt
from xlutils.copy import copy
from selenium.webdriver import ActionChains
from selenium.webdriver.chrome.options import Options
from selenium import webdriver
from sys import argv
from selenium.webdriver.common.keys import Keys


if __name__ == '__main__':

    sleepTime = 2
    if len(argv) > 1:
        sleepTime = int(argv[1])

    inexcelPath = 'D:/robot/in.xlsx'
    outexcelPath = 'D:/robot/' + datetime.datetime.now().strftime('%Y%m%d') + '.xls'
    sheetName = '明细'
    chromeDriver = 'd:/robot/chromedriver.exe'
    print('-->请将excel表格放到D盘robot文件夹下改名为“in.xlsx”,sheet表命名为“明细”')
    print('-->浏览器驱动chromedriver.exe也放于robot文件夹')
    print('-->发生错误时再次运行时,生成的out.xls会被覆盖,且in.xlsx会从头开始')
    print('-->在公司环境下运行本工具')

    #os.popen(r'chrome.exe --remote-debugging-port=9222 --user-data-dir="d:\facedata\chrome"', "r")

    proxy = 'cmproxy.gmcc.net:8081'
    irobot = robot(driverPath=chromeDriver,needProxy=False,proxy=proxy,debugAddr='127.0.0.1:9222',headless=False)
    print('-->已设置代理为:'+proxy)

    stateLabel = 0
    while True:
        #print('state:' + str(stateLabel))
        title = ''
        time.sleep(sleepTime / 4 )
        try:
            title = irobot.browser.title
        except Exception as e:
            irobot.browser.switch_to.window(irobot.browser.window_handles[0])
            try:
                title = irobot.browser.title
            except Exception as e:
                print('-->再次获取标题失败')
            print('-->正在切换当前窗口')


        if stateLabel == 0 and title == '新一代客服系统':
            stateLabel = 1  #进入登录网址
        if stateLabel == 2 and not title:
           stateLabel = 3 #点击登录,跳转

        if stateLabel == 1 and not title:
            stateLabel = 2 # 进入了登录界面
            irobot.browser.close()
            time.sleep(sleepTime / 2)
        try:
            # if self.browser.find_element_by_xpath("//iframe[contains(@src,'SrProblemProcessSimpleQuery')]"):
            if irobot.browser.find_element_by_css_selector('iframe[src*="SrProblemProcessSimpleQuery"]'):
                break
        except Exception as e:
            print('-->正在等待登录后的界面,请登录后进入工单查询界面...')

    print('-->登录成功,开始查询工单')
    currentRow = 0;
    wb = xlrd.open_workbook(inexcelPath)
    sheet = wb.sheet_by_name(sheetName)  # 获取指定表名的表
    ids = sheet.col_values(0)
    ids.pop(0)

    outwb = xlwt.Workbook(encoding='gb2312')
    worksheet = outwb.add_sheet('查询结果')
    worksheet.write(currentRow, 0, '工单号')
    worksheet.write(currentRow, 1, '是否有评测结果')
    worksheet.write(currentRow, 2, '评测结果')
    outwb.save(outexcelPath)

    currentRow += 1;

    for ida in ids:
        #aidVar +=1
        try:
            wb = xlrd.open_workbook(outexcelPath)
            wb1 = copy(wb)
            worksheet = wb1.get_sheet(0)
            #print('current->' + irobot.browser.current_window_handle) ################

            try:
                iframe1 = irobot.browser.find_element_by_xpath("//iframe[contains(@src,'SrProblemProcessSimpleQuery')]")
                if iframe1:
                    irobot.browser.switch_to.frame(iframe1)
                    time.sleep(sleepTime / 2)
            except Exception as e:
                print('无法获取目标iframe-"工单查询页面"')
                irobot.browser.switch_to.default_content()

            idenInputEle = irobot.getElementByCssSelecter('input[name=wrkfmShowSwftno]')
            checkBtnEle = irobot.getElementByCssSelecter('#SearchBtn')
            idenInputEle.send_keys(Keys.CONTROL + 'a')
            time.sleep(sleepTime / 2)
            idenInputEle.send_keys(ida)
            time.sleep(sleepTime / 2)
            checkBtnEle.click()
            time.sleep(sleepTime / 2)
            idenInputEles = irobot.getElementsByCssSelecter('.evenTr td')
            for idenInputEle in idenInputEles:
                time.sleep(sleepTime / 10)
                if ida in idenInputEle.text:
                    irobot.getElementByCssSelecter('u',parentElement=idenInputEle).click()
                    break
            irobot.browser.switch_to.default_content() #返回默认的文档
            time.sleep(sleepTime)
            iframe2 = irobot.browser.find_element_by_xpath("//iframe[contains(@src,'srProblemProcessDetail')]")
            irobot.browser.switch_to.frame(iframe2) #进入工单iframe
            time.sleep(sleepTime)
            text1 = irobot.getElementByCssSelecter('#firstChildTab .sn-tab-items').text

            if '评测结果' in text1:
                try:
                    irobot.getElementByCssSelecter('#firstChildTab .evaluateResult').click()
                    time.sleep(sleepTime / 2)
                    text2 = irobot.getElementByCssSelecter('.evaluateResult .serviceinfo').text
                    text2.replace(',','#')
                    text2.replace(',', '#')
                    print(ida + ',是:有评测结果,'+ text2)
                    print('-----------------------')
                    worksheet.write(currentRow, 0, ida)
                    worksheet.write(currentRow, 1, '是')
                    worksheet.write(currentRow, 2, text2)

                except Exception as e:
                    print(str(e))
                    print('点击评测结果或者获取评测结果失败')
                    print('-----------------------')
                    time.sleep(sleepTime)
            else:
                print(ida + ',否:无评测结果')
                worksheet.write(currentRow, 0, ida)
                worksheet.write(currentRow, 1, '否')
                worksheet.write(currentRow, 2, '无评测结果')
            time.sleep(sleepTime)
            irobot.browser.switch_to.default_content()
            irobot.getElementsByCssSelecter('.uiTabItemHead .uiTabItemInnerWrap i')[1].click()
            wb1.save(outexcelPath)
            time.sleep(sleepTime)
            currentRow += 1
        except Exception as e:
            print('-->发生未知错误')
            #print(str(e))
            try:
                irobot.browser.switch_to.default_content()
                irobot.getElementsByCssSelecter('.uiTabItemHead .uiTabItemInnerWrap i')[1].click()
            except Exception as e:
                #irobot.browser.switch_to.window(irobot.browser.window_handles[aidVar / 2])
                print('-->界面操作恢复中,请打开浏览器窗口,再次尝试恢复')
                time.sleep(sleepTime)
    print('-->已完成全部查询')


评论