微信公众号搜"智元新知"关注
微信扫一扫可直接关注哦!

无头Chrome浏览器,在下载/转换为pdf时为文件提供特定名称

如何解决无头Chrome浏览器,在下载/转换为pdf时为文件提供特定名称

我正在使用无头铬将网页转换为pdf。所以我可以做到,但是我想给下载的文件指定特定的名称(转换为pdf),因为我一次使用url转换了100多个pdf文件。以下是我的代码。我已经尝试过shutil,但是先前的文件删除,只有最新的文件无法打开。

import os
import time
import json
from selenium import webdriver

try:
    import urlparse
    from urllib import urlencode
except: # For Python 3
    import urllib.parse as urlparse
    from urllib.parse import urlencode

if os.path.exists(".DS_Store"):
    os.remove(".DS_Store")


appState = {
    "recentDestinations": [
        {
            "id": "Save as PDF","origin": "local"
        }
    ],"selectedDestinationId": "Save as PDF","version": 2
}

profile = {'printing.print_preview_sticky_settings.appState': json.dumps(appState)}

download_path = r'/Users/expether/Desktop/Diagence/Task 1' # Path where browser save files
new_path = r'/Users/expether/Desktop/Diagence/Task 1/mkpdf' # Path where to move file


photoid = 3
seaid_destination = 2    

searchs = ["merge_recog","shape_recog","pattern_recog"] 


for search in searchs:
    
    url = "http://0.0.0.0:8080/" + search + "?"
    params = {'photoid':photoid,'seaid_destination':seaid_destination}
    
    file_name = search + str(photoid) + "_" + str(seaid_destination) + ".pdf"
    
    url_parse = urlparse.urlparse(url)
    query = url_parse.query            #retrieve query item
    url_dict = dict(urlparse.parse_qsl(query))     #convert the list to dict
    url_dict.update(params)                  #add params to dict
    url_new_query = urlencode(url_dict)            #convert it in "percent-encoded"
    url_parse = url_parse._replace(query=url_new_query)   #replace old query with new query
    new_url = urlparse.urlunparse(url_parse)        #construct new url
    print(new_url)
    
    chrome_options = webdriver.ChromeOptions()
    chrome_options.binary_location = '/Applications/Google Chrome Canary.app/Contents/MacOS/Google Chrome Canary'
    chrome_options.add_experimental_option('prefs',profile)
    chrome_options.add_argument('--kiosk-printing')
    CHROMEDRIVER_PATH = '/Users/expether/Downloads/chromedriver'
    driver = webdriver.Chrome(options=chrome_options,executable_path=CHROMEDRIVER_PATH)
    
    driver.get(new_url)
    
    import shutil
    filename = max([download_path + "/" + f for f in os.listdir(download_path)],key=os.path.getctime)
    shutil.move(filename,os.path.join(download_path,file_name))
    # driver.execute_script('window.print();')
    driver.quit()

解决方法

工作代码:

from selenium import webdriver
import json
import os
import shutil
import time

try:
    import urlparse
    from urllib import urlencode
except: # For Python 3
    import urllib.parse as urlparse
    from urllib.parse import urlencode

if os.path.exists(".DS_Store"):
    os.remove(".DS_Store")


downloadPath = '/Users/expether/Desktop/Diagence/Task 1'

#for seaid = 4 -> photoid = 1~8
#for seaid = 2 -> photoid = 1~77
photoid = 3
seaid_destination = 2    

searchs = ["merge_recog","shape_recog","pattern_recog"] 

chrome_options = webdriver.ChromeOptions()
settings = {
        "recentDestinations": [{
            "id": "Save as PDF","origin": "local","account": "",}],"selectedDestinationId": "Save as PDF","version": 2
    }

prefs = {'printing.print_preview_sticky_settings.appState':json.dumps(settings),'savefile.default_directory':downloadPath}

chrome_options.binary_location = '/Applications/Google Chrome Canary.app/Contents/MacOS/Google Chrome Canary'
chrome_options.add_experimental_option('prefs',prefs)
chrome_options.add_argument('--kiosk-printing')
CHROMEDRIVER_PATH = '/Users/expether/Downloads/chromedriver'
driver = webdriver.Chrome(options=chrome_options,executable_path=CHROMEDRIVER_PATH)

for search in searchs:
    
    url = "http://0.0.0.0:8080/" + search + "?"
    params = {'photoid':photoid,'seaid_destination':seaid_destination}
    

    url_parse = urlparse.urlparse(url)
    query = url_parse.query            #retrieve query item
    url_dict = dict(urlparse.parse_qsl(query))     #convert the list to dict
    url_dict.update(params)                  #add params to dict
    url_new_query = urlencode(url_dict)            #convert it in "percent-encoded"
    url_parse = url_parse._replace(query=url_new_query)   #replace old query with new query
    new_url = urlparse.urlunparse(url_parse)        #construct new url
    print(new_url) 
    driver.get(new_url)
    driver.execute_script('window.print();')
    
    
    newfilename = search + str(photoid) + "_" + str(seaid_destination) + ".pdf"
    dirpath = '/Users/expether/Desktop/Diagence/Task 1/mkpdf'
    
    # filename = max([downloadPath + '/' + f for f in os.listdir(downloadPath)],key=os.path.getctime)
    # shutil.move(os.path.join(dirpath,filename),newfilename)   
    
    filename = max([downloadPath + "/" + f for f in os.listdir(downloadPath)],key=os.path.getctime)
    shutil.move(filename,os.path.join(dirpath,newfilename))
    
driver.quit()

版权声明:本文内容由互联网用户自发贡献,该文观点与技术仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容, 请发送邮件至 dio@foxmail.com 举报,一经查实,本站将立刻删除。