2021年5月4日星期二

ConnectionRefusedError: [WinError 10061] - cannot transfer to sql from dataframe pandas using python. pulling my hair out at the moment

Apologies for the long code, so im basicly doing a webcrawl and outputting the data onto an excel sheet. i have been successful thus far, but when i try to connect it to mysql (mariadb) it is coming up with an error "connection refused".. i have basicly checked every typo to any other mistakes but when running a debug it errors out on the the line , need a helping hand or a hint.. thank you

SQL database setting is attached onto the photoSQL setting

it errors out on the code line below

 engine = create_engine(                      "mysql+mysqldb://root:abcdefgH1@localhost:3306/stamprallydb}", encoding='utf-8')  

i have also tried

 engine = create_engine(                      "mysql+mysqldb://root:abcdefgH1@localhost:50571/stamprallydb}", encoding='utf-8')  

Would save time to read from try: part of the code at the bottom

Error in console

ConnectionRefusedError: [WinError 10061]     # This Python file uses the following encoding: utf-8    # 모듈 가져오기  # pip install selenium  # pip install bs4  # pip install pymysql  # 254 Results took 30Min. 1 POST PER 7 SECONS  import pymysql  from sqlalchemy import create_engine  pymysql.install_as_MySQLdb()  import MySQLdb    import re  import pandas as pd  import numpy as np  import urllib.request  import math  import time  import sys  import os  from selenium.webdriver.support.ui import Select  from selenium.webdriver.support import expected_conditions as EC  from selenium.webdriver.support.ui import WebDriverWait  from selenium.webdriver.common.by import By  from selenium import webdriver as wd  from Stamprally import StamprallyInfo      # Start Recording Program Loading Time  programStart = time.time()  # =============================================================================================  # prefectureNameList = ["海外", "北海道地方", "北海道", "東北地方", "青森県", "岩手県", "宮城県", "秋田県", "山形県", "福島県", "関東地方", "茨城県", "栃木県", "群馬県", "埼玉県", "千葉県", "東京都", "神奈川県", "中部地方", "新潟県", "富山県", "石川県", "福井県", "山梨県", "長野県", "岐阜県", "静岡県","愛知県", "三重県", "近畿地方", "滋賀県", "京都府", "大阪府", "兵庫県", "奈良県", "和歌山県", "中国地方", "鳥取県", "島根県", "岡山県", "広島県", "山口県", "四国地方", "徳島県", "香川県", "愛媛県", "高知県", "九州・沖縄地方", "福岡県", "佐賀県", "長崎県", "熊本県", "大分県", "宮崎県", "鹿児島県", "沖縄県"]  # =============================================================================================    prefectureNameList = ["北海道", "青森県", "岩手県", "宮城県", "秋田県", "山形県", "福島県", "茨城県", "栃木県", "群馬県", "埼玉県", "千葉県", "東京都", "神奈川県", "新潟県", "富山県", "石川県", "福井県", "山梨県", "長野県", "岐阜県", "静岡県",                        "愛知県", "三重県", "滋賀県", "京都府", "大阪府", "兵庫県", "奈良県", "和歌山県", "鳥取県", "島根県", "岡山県", "広島県", "山口県", "徳島県", "香川県", "愛媛県", "高知県", "福岡県", "佐賀県", "長崎県", "熊本県", "大分県", "宮崎県", "鹿児島県", "沖縄県"]    #prefectureNameList = ["東京都"]    data = []  timeStampData = []    #timeStampDataAggregate = []  contentAggregator = []  timeStampData.append("프로그램 시작")  timeStampData.append(programStart)  # db = Db()  main_url = 'https://stamprally.org/'  # 스탬프랠리 정보를 담는 리스트  # stamprally_list = []  # 드라이브 로드  # For MAC users driver = wd.Chrome(executable_path='./chromedriver')  # End Recording Program Loading Time  programEnd = time.time()  timeStampData.append(programEnd - programStart)  # timeStampDataAggregate.append(timeStampData)    timeStamp = pd.DataFrame(np.array([timeStampData]), columns=[                           '설명', 'TimeStamp', '소요기간'])    # tmp_res = [[]]  # tmp_res = [][]  # timeStamp = pd.DataFrame(tmp_res[0].append(timeStampData), columns=[  #                          '설명', 'TimeStamp', '소요기간'])    print(timeStamp)    timeStampData.clear()  print(timeStampData)  # Start Recording Selenium Driver Loading  print("셀레니엄 ")  timeStampData.append("셀레니엄 드라이버 로딩")  seleniumStart = time.time()  timeStampData.append(seleniumStart)  driver = wd.Chrome(executable_path='chromedriver.exe')  driver.get(main_url)  seleniumEnd = time.time()  timeStampData.append(seleniumEnd - seleniumStart)  print(timeStampData)  rowAddTimeStampSelenium = pd.Series(timeStampData, index=timeStamp.columns)  timeStamp = timeStamp.append(rowAddTimeStampSelenium, ignore_index=True)  # timeStampDataAggregate.append(timeStampData)  timeStampData.clear()    # wait = WebDriverWait(driver, 10)  # searchCat1Options = wait.until(EC.presence_of_all_elements_located(  #     (By.CSS_SELECTOR, "#header_search_cat1>:is(option.level-1 ,option.level-0)")))    # prefectureValueStorage = driver.find_element_by_xpath(  #     "//*[@id='header_search_cat1']/option[2]").get_attribute('value')      # prefectureValueStorage = [x.get_attribute('value') for x in driver.find_elements_by_xpath(  #     "//select[@name='search_cat1']/option[@class='level-1' or @class='level-0']")]      #prefectureValueStorage = [81]  # =============================================================================================  prefectureValueStorage = [x.get_attribute('value') for x in driver.find_elements_by_xpath(      "//select[@name='search_cat1']/option[@class='level-1']")]  # =============================================================================================  prefectureNameIterator = -1  print("START OF PREFECTURE " + prefectureNameList[prefectureNameIterator])  # Loop through all the different prefectures  for prefectureValue in prefectureValueStorage:      print("Prefecture Start Time")      prefectureStart = time.time()      prefectureNameIterator += 1      # Add Prefecture Name to timeStampData      timeStampData.append(prefectureNameList[prefectureNameIterator])      timeStampData.append(prefectureStart)      print(timeStampData)      driver.get(          f"https://stamprally.org/?search_keywords&search_keywords_operator=and&search_cat1={prefectureValue}&search_cat2=0")      # Calculate How Many Times To Run Page Loop      imageDownloadCounter = 1      totalList = driver.find_element_by_css_selector(          'div.page_navi2.clearfix>p').text  # get_attribute('text')      totalListNum = totalList.split("件中")      # Add TotalListNum to the contentAggregator      # contentAggregator.append(int(totalListNum[0]))      if int(totalListNum[0]) % 10 != 0:          pageLoopCount = math.ceil((int(totalListNum[0])/10))      else:          pageLoopCount = int(totalListNum[0])/10      # continue      currentpage = 0      while currentpage < pageLoopCount:          currentpage += 1          print("Current Page " + str(currentpage))        # ========================================================================================================================================================      # # Loop through all the Listings within the prefecture page          driver.get(              f"https://stamprally.org/?search_keywords&search_keywords_operator=and&search_cat1={prefectureValue}&search_cat2=0&paged={currentpage}")          # print("Loading Page %s" % currentpage)          # ========================================================================================================================================================          # Add prefectureName to the contentAggregator          # contentAggregator.append(prefectureNameList[prefectureNameIterator])          # Gather All List Links          urlList = []          currentUrlCounter = 0          listURLContainer = driver.find_elements_by_css_selector(              '#post_list2 > li > a')          # Put all the lists in one Array          for url in listURLContainer:              urlList.append(url.get_attribute('href'))          # Loop through all the links          for listURL in listURLContainer:              contentAggregator = []      # Add TotalListNum to the contentAggregator              contentAggregator.append(int(totalListNum[0]))      # Add prefectureName to the contentAggregator              contentAggregator.append(                  prefectureNameList[prefectureNameIterator])              #print('article Link: ')              # print(urlList[currentUrlCounter])          # Add listLink to the contentAggregator              contentAggregator.append(                  urlList[currentUrlCounter])          # for Each Links in listURLContainer:              driver.get(urlList[currentUrlCounter])              currentUrlCounter += 1              locationTag = [x.get_attribute('title') for x in driver.find_elements_by_xpath(                  "//*[@id='post_meta_top']/li[1]/a[@class='cat-category']")]              # print(locationTag)          # Add locationTag to the contentAggregator              contentAggregator.append(locationTag)                eventTag = [x.get_attribute('title') for x in driver.find_elements_by_xpath(                  "//*[@id='post_meta_top']/li[2]/a[@class='cat-category2']")]          # Add eventTag to the contentAggregator              contentAggregator.append(eventTag)            # Select, Post Date & Remove Non-Text Variable              availablePeriod = (driver.find_element_by_css_selector(                  'div#post_date')).text.split("( ")              availablePeriodFormatted = availablePeriod[0].replace("開催期間:", "")              availableStartDate = availablePeriod[0].split(" ~ ")              endDate = availableStartDate[1]              availableStartDateFormatted = availableStartDate[0].replace(                  "開催期間:", "")          # Select Latest Update Date              lastUpdatedDate = driver.find_element_by_css_selector(                  'time.entry-date.updated').text              #print("Available Period:")              # print(availablePeriodFormatted)          # Add Available Period to the contentAggregator              contentAggregator.append(availablePeriodFormatted)              #print("Available StartDate:")              # print(availableStartDateFormatted)          # Add Available StartDate to the contentAggregator              contentAggregator.append(availableStartDateFormatted)              #print("End Date: ")              # print(endDate)          # Add endDate to the contentAggregator              contentAggregator.append(endDate)              #print("Last Updated:")              # print(lastUpdatedDate[6:])          # Add lastUpdatedDate to the contentAggregator              contentAggregator.append(lastUpdatedDate[6:])      # ========================================================================================================================================================          # Download Main Post Image              mainImageUrl = driver.find_element_by_css_selector(                  'img.attachment-post-thumbnail.size-post-thumbnail.wp-post-image').get_attribute('src')          # Add lastUpdatedDate to the contentAggregator              contentAggregator.append(mainImageUrl)          # Save Post Main Title              # postTitle = driver.find_element_by_xpath(              #     "//*[@id='post_title']/text()")              postTitle1 = driver.find_element_by_css_selector(                  'h2#post_title').text.replace("開催終了", "")              postTitle = postTitle1.replace("ただいま開催中", "")          # Replace all special characters in a string with empty string              # Pass the string in search                # specialCharacter = "!@#$%^&*()-+?_=,<>/"              # if any(character in specialCharacter for character in postTitle):              removeSpecialChars = postTitle.translate(                  {ord(c): " " for c in "!@#$%^&*()[]{};:,./<>?\|`~-=_+"})              postTitle = removeSpecialChars              print(postTitle)              # else:              #     continue                # Add Title to the contentAggregator              contentAggregator.append(postTitle)              #print("Title: ")              # print(postTitle)          # Find out Event validty status              eventValidity = driver.find_element_by_xpath(                  "//*[@id='post_title']/span").text            # Add eventValidity to the contentAggregator              contentAggregator.append(eventValidity)            # Save Post Main Image              urllib.request.urlretrieve(mainImageUrl, (str(                  prefectureNameList[prefectureNameIterator])+postTitle+str(imageDownloadCounter) + ".png"))              imageDownloadCounter += 1          # Get Inner Website Link              innerWebSiteButtonURL = driver.find_element_by_css_selector(                  'div.post_content.clearfix > div >a').get_attribute('href')              #print("inner Website Button URL: " + innerWebSiteButtonURL)          # Add innerWebSiteURL to the contentAggregator              contentAggregator.append(innerWebSiteButtonURL)          # Gather Main Post Text Content              mainText = driver.find_elements_by_css_selector(                  'div.post_content.clearfix > p')              mainContentText = []          # Remove Disclamimer text              for mainContentDetail in mainText:                  mainContentText.append(mainContentDetail.text)              mainContextTextCount = len(mainContentText)-1              # print(mainContentText[:mainContextTextCount])          # Add Main Post Text Content to the contentAggregator              contentAggregator.append(mainContentText[:mainContextTextCount])        # ========================================================================================================================================================              contentReorder = [1, 0, 10, 11, 5, 6, 7, 8, 13, 3, 4, 9, 12, 2]              contentAggregator = [contentAggregator[i] for i in contentReorder]              print("=====================================================================================================================================================")              # print(contentAggregator)              data.append(contentAggregator)              # print(data)              # print(pd.DataFrame(data, columns=["Total List Number", "Prefecture", "ListLink", "Location Tag", "Event Tag", "Available Period",              #                                   "Available StartDate", "End Date", "Last Updated", "Main Image URL", "Title","Event Validty" "innerWebSiteURL", "mainText"]))              df = pd.DataFrame(data, columns=["Prefecture", "Total List Number", "Title", "Event Validity", "Available Period", "Available StartDate",                                               "End Date", "Last Updated",  "mainText", "Location Tag", "Event Tag", "Main Image URL", "innerWebSiteURL", "ListLink"])                try:                  engine = create_engine(                      "mysql+mysqldb://root:abcdefgH1@localhost:3306/stamprallydb}", encoding='utf-8')                  conn = engine.connect()                  df.to_sql(name='stamprallydb_crawl_result',                            con=engine, if_exists='append', index=False)              except:                  print("Error in Mysql connection")      else:          prefectureEnd = time.time()          timeStampData.append(prefectureEnd-prefectureStart)          rowAddTimeStampPrefecture = pd.Series(              timeStampData, index=timeStamp.columns)          timeStamp = timeStamp.append(              rowAddTimeStampPrefecture, ignore_index=True)          timeStampData.clear()          # timeStampDataAggregate.append(timeStampData)    excelFileStart = time.time()  xlwriter = pd.ExcelWriter('StampRally_Crawler.xlsx')  df.to_excel(xlwriter, sheet_name="Stamprally.org Crawl Result")  # Add last Series "엑셀 파일 저장"  excelFileEnd = time.time()  timeStampData.append("엑셀 파일 저장")  timeStampData.append(excelFileStart)  timeStampData.append(excelFileEnd-excelFileStart)  rowAddTimeStampPrefecture = pd.Series(timeStampData, index=timeStamp.columns)  timeStamp = timeStamp.append(rowAddTimeStampPrefecture, ignore_index=True)  # timeStampDataAggregate.append(timeStampData)  # Create New sheet and write to Excel  timeStamp.to_excel(xlwriter, sheet_name="TimeStamp Result")    xlwriter.close()    # dictionary = {'Prefecture': ['Jai', 'Princi', 'Gaurav', 'Anuj'],  #               'Title': [5.1, 6.2, 5.1, 5.2],  #               'AvailablePeriod': ['Msc', 'MA', 'Msc', 'Msc']  #               'AvailableDate': ['Msc', 'MA', 'Msc', 'Msc']  #               'AvailableEndDate': ['Msc', 'MA', 'Msc', 'Msc']  #               'LastUpdateDate': ['Msc', 'MA', 'Msc', 'Msc']  #               'Maintext': ['Msc', 'MA', 'Msc', 'Msc']  #               'PostLink': ['Msc', 'MA', 'Msc', 'Msc']  #               'InnerWebsiteLink': ['Msc', 'MA', 'Msc', 'Msc']  #               'LocationTag': ['Msc', 'MA', 'Msc', 'Msc']  #               'EventTag': ['Msc', 'MA', 'Msc', 'Msc']  #               }  # ========================================================================================================================================================  # Close  Off  driver.close()  driver.quit()  sys.exit()  
https://stackoverflow.com/questions/67393862/connectionrefusederror-winerror-10061-cannot-transfer-to-sql-from-dataframe May 05, 2021 at 08:48AM

没有评论:

发表评论