WebDriverWait(driver, 120).until( EC.presence_of_element_located((By.ID, "page-loader")) )
WebDriverWait(driver, 60).until( EC.text_to_be_present_in_element((By.XPATH, '//*[@id="grid"]/div[2]/table/tbody/tr[21]/td[2]'), 'TEXT_MATCH'))
WebDriverWait(driver, 60).until( EC.invisibility_of_element_located((By.ID, 'spinner-loading')) )
driver.execute_script(''' var fDate = $("#ctl00_Content_TransactionDetail_TxtFromDate").val(); var tDate = $("#ctl00_Content_TransactionDetail_TxtToDate").val(); getTranHisData($("#ctl00_Content_TransactionDetail_ListAccounts").val(), fDate, tDate); ''')
driver.execute_script(''' var c = document.createElement('canvas'); var img = document.getElementById('ctl00_Content_Login_Captcha_Captcha'); c.height = img.naturalHeight; c.width = img.naturalWidth; var ctx = c.getContext('2d'); ctx.drawImage(img, 0, 0, c.width, c.height); var base64String = c.toDataURL(); var c_detect = document.createElement('div'); c_detect.setAttribute("id", "c_detect"); c_detect.innerHTML = base64String; document.body.appendChild(c_detect); ''')
element = WebDriverWait(driver, 60).until( EC.element_to_be_clickable((By.XPATH, '//*[@id="ctl00_mainContainer_ctl00_btnFilterStmt"]')) ) element.click()
Thư viện:
import time from selenium import webdriver from selenium.webdriver.chrome.options import Options from selenium.common.exceptions import NoSuchElementException, TimeoutException from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.common.by import By from django.conf import settings class Browser: download_path = None driver = None def __init__(self, download_path): self.download_path = download_path self.set_driver() def set_driver(self): options = Options() options.headless = True options.add_argument('--no-sandbox') options.add_argument('headless') options.add_argument("--headless=new") options.add_argument('--disable-infobars') options.add_argument('--disable-dev-shm-usage') #if settings.APP_ENV == 'production': # options.add_argument('--remote-debugging-port=9222') options.add_argument("--window-size=1920,1200") options.add_argument("--disable-notifications") options.add_argument('--verbose') options.add_experimental_option("prefs", { "download.default_directory": self.download_path, "download.prompt_for_download": False, "download.directory_upgrade": True, "safebrowsing_for_trusted_sources_enabled": False, "safebrowsing.enabled": False }) options.add_argument('--disable-gpu') options.add_argument('--disable-software-rasterizer') #options.add_argument("user-data-dir=chrome-data") options.add_argument("start-maximized") # open Browser in maximized mode options.add_argument("--disable-extensions") # disabling extensions chrome_driver_path = settings.RESOURCE_PATH + '/chromedriver/chromedriver' #self.driver = webdriver.Chrome(options=options) self.driver = webdriver.Chrome(options=options, executable_path=chrome_driver_path) def get_driver(self): return self.driver def destroy_driver(self): self.driver.quit() @staticmethod def click_by_xpath(driver, xpath): element = WebDriverWait(driver, 60).until( EC.element_to_be_clickable((By.XPATH, xpath)) ) element.click() @staticmethod def download_excel(self, timeout=25, download_func=None): seconds = 0 has_file_download = False while has_file_download == False and seconds < timeout: time.sleep(1) if download_func != None: has_file_download = download_func() seconds += 1 return has_file_download
Ví dụ:
import logging import datetime import sys import time import hashlib import traceback from bank.services.captcha import Captcha from selenium.common.exceptions import NoSuchElementException, TimeoutException from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.common.by import By from django.conf import settings from django.core.management.base import BaseCommand from bank.services.browser import Browser from bank.services.helper import Helper logger = logging.getLogger('django') class Command(BaseCommand): help = 'Crawl MBBank IB' bank_no = 'MBBANK' account_no_1 = '2341***9999' account_no_2 = '0680***6666' account_no_3 = '0928***8888' account_no_4 = '9501***9999' account_no_5 = '9065***1617' def __init__(self): self.download_path = settings.DATA_PATH + "/mb" def get_transactions(self, driver, account_no): transactions = [] current_page = 1 page = 1 while page < 100000000: time.sleep(5) element = self.check_exists_by_xpath(driver, '//*[@id="page-items"]') if element: current_page = int(element.find_element(By.CLASS_NAME, 'active').text) print('page' + str(current_page)) if page > current_page: break page += 1 trans_elements = driver.find_elements(By.XPATH, '//*[@id="tbl-transaction-history"]/tbody/tr') # logger.error(trans_elements) for element in trans_elements: bank_no = self.bank_no reference_id = element.find_element(By.XPATH, './/td[3]').text trans_time_o = element.find_element(By.XPATH, './/td[9]').text trans_time = trans_time_o trans_time = datetime.datetime.strptime(trans_time_o, '%d/%m/%Y %H:%M:%S').strftime("%Y-%m-%d %H:%M") amoutnOut = element.find_element(By.XPATH, './/td[4]').text amoutnIn = element.find_element(By.XPATH, './/td[5]').text amoutnOut = int(amoutnOut.replace(',', '')) amoutnIn = int(amoutnIn.replace(',', '')) amount = 0 if amoutnOut : amount = amoutnOut if amoutnIn: amount = amoutnIn balance = element.find_element(By.XPATH, './/td[6]').text balance = balance.replace(' ', '').replace('VND', '').replace(',', '') content = element.find_element(By.XPATH, './/td[8]').text content = content.strip() if account_no != self.account_no_4: balance = 0 checksum = hashlib.md5( (str(reference_id) + self.bank_no + account_no + str(amount) + str(balance) + content + trans_time_o).encode( 'utf-8')).hexdigest() transaction = { 'checksum': checksum, 'bank_no': bank_no, 'reference_id': reference_id, 'account_no': account_no, 'content': content, 'amount': amount, 'balance': balance, 'member_bank_no': None, 'member_account_no': None, 'member_account_name': None, 'trans_time': trans_time, } print(transaction) size = len(driver.find_elements(By.XPATH, '//*[@id="page-items"]/button')) size = size - 1 return transactions def check_exists_by_xpath(self, driver, xpath): try: ele = driver.find_element(By.XPATH, xpath) except NoSuchElementException: return False return ele def getAccountSelect(self, account_no): if account_no == self.account_no_1: return '//*[@class="cdk-overlay-pane"][1]/div/*[contains(@class,"mat-option")][8]' elif account_no == self.account_no_2: return '//*[@class="cdk-overlay-pane"][1]/div/*[contains(@class,"mat-option")][5]' elif account_no == self.account_no_3: return '//*[@class="cdk-overlay-pane"][1]/div/*[contains(@class,"mat-option")][7]' elif account_no == self.account_no_4: return '//*[@class="cdk-overlay-pane"][1]/div/*[contains(@class,"mat-option")][1]' elif account_no == self.account_no_5: return '//*[@class="cdk-overlay-pane"][1]/div/*[contains(@class,"mat-option")][9]' else: return "" def get_account(self, driver, account_no): try: logging.getLogger('mb').info("Get account "+account_no) transaction_url = "https://ebank.mbbank.com.vn/cp/account-info/transaction-inquiry?acctNo=" + account_no driver.get(transaction_url) WebDriverWait(driver, 10).until( lambda driver: len(driver.find_element(By.XPATH, '//*[@id="mat-input-0"]').get_attribute("value")) > 0) WebDriverWait(driver, 60).until( EC.presence_of_element_located((By.ID, "btn-query")) ) time.sleep(5) # driver.save_screenshot(settings.LOGS_PATH + '/screenshot-mb-'+account_no+'-1.png') self.click_by_xpath(driver, '//*[@name="account-name"]') # driver.save_screenshot(settings.LOGS_PATH + '/screenshot-mb-'+account_no+'-2.png') self.click_by_xpath(driver, self.getAccountSelect(account_no)) driver.save_screenshot(settings.LOGS_PATH + '/screenshot-mb-'+account_no+'-3.png') tod = datetime.datetime.now() to_date = tod.strftime("%d") date_index = int(to_date) -1 self.click_by_xpath(driver, '//*[@id="mat-radio-3"]/label/div[1]') time.sleep(5) driver.execute_script("document.getElementsByClassName('mat-datepicker-toggle')[0].click()") driver.execute_script("document.getElementsByClassName('mat-calendar-body-cell-content')["+str(date_index)+"].click();") driver.execute_script("document.getElementsByClassName('mat-card-custom')[0].getElementsByTagName('button')[3].click();") driver.find_element(By.XPATH, '//*[@id="btn-query"]').click() WebDriverWait(driver, 60).until( EC.presence_of_element_located((By.ID, "tbl-transaction-history")) ) self.get_transactions(driver, account_no) except Exception as e: print('get_account_1: ' + str(e)) logger.error(traceback.format_exc()) def login(self, driver): current_url = driver.current_url driver.save_screenshot(settings.LOGS_PATH + '/screenshot-mb.png') WebDriverWait(driver, 30).until( EC.presence_of_element_located((By.CLASS_NAME, "image-captcha-frame")) ) #captcha_base64 = driver.find_element_by_css_selector('.image-captcha-frame > img').get_attribute('src') captcha_base64 = driver.find_element(By.CSS_SELECTOR, '.image-captcha-frame > img').get_attribute('src') captcha = Captcha() captcha_txt = captcha.request(captcha_base64) print(captcha_txt) if captcha_txt == "": driver.quit() driver.find_element(By.XPATH, '//*[@id="corp-id"]').send_keys(settings.BANK['mb']['company_no']) driver.find_element(By.XPATH, '//*[@id="user-id"]').send_keys(settings.BANK['mb']['username']) driver.find_element(By.XPATH, '//*[@id="password"]').send_keys(settings.BANK['mb']['password']) driver.find_element(By.XPATH, '//*[@id="main-content"]/mbb-welcome/div/div/div[2]/div[2]/div/mbb-login/form/div/div[2]/mbb-word-captcha/div/div[2]/div/input').send_keys(captcha_txt) driver.save_screenshot(settings.LOGS_PATH + '/screenshot-mb-login.png') driver.find_element(By.XPATH, '//*[@id="login-btn"]').click() driver.save_screenshot(settings.LOGS_PATH + '/screenshot-mb-login1.png') WebDriverWait(driver, 30).until(EC.url_changes(current_url)) driver.save_screenshot(settings.LOGS_PATH + '/screenshot-mb-login2.png') return driver def crawl(self): browser = Browser(self.download_path) driver = browser.get_driver() driver.get("https://ebank.mbbank.com.vn/cp/pl/login") current_url = driver.current_url try: self.login(driver) self.get_account(driver, self.account_no_4) self.get_account(driver, self.account_no_1) self.get_account(driver, self.account_no_2) self.get_account(driver, self.account_no_3) self.get_account(driver, self.account_no_5) except Exception as e: print('MB Failed Exception: ' + str(e)) logger.error(traceback.format_exc()) self.quit(driver) def click_by_xpath(self, driver, xpath): element = WebDriverWait(driver, 60).until( EC.element_to_be_clickable((By.XPATH, xpath)) ) element.click() def handle(self, *args, **kwargs): if Helper.count_process_exists('mb_v2') > 1: logging.getLogger('mb').info("Exit by process crawler runing...") print("Exit by process crawler runing...") sys.exit() self.crawl() def quit(self, driver): time.sleep(2) driver.close() driver.quit()