微信公众号搜"智元新知"关注
微信扫一扫可直接关注哦!

如何使用python IMAP提取多部分电子邮件的正文并保存附件?

如何解决如何使用python IMAP提取多部分电子邮件的正文并保存附件?

我正在做一个项目,我会收到带有特定“主题”的电子邮件。有用户转发给我的。正文由文本组成,但在原始电子邮件中,并且在转发行上方没有输入新文本。电子邮件的任一部分也有附件。

我使用 python 和 IMAP 编写了以下代码,并且只有在电子邮件是新的而不是转发的电子邮件时才能存储附件和正文。

def getAllEmails(username,password,subject,fromEmail,folderName):
   
    mail = imaplib.IMAP4_SSL("imap.outlook.com")
    mail.login(username,password)
    print("Login success..........")
    
    mail.select("inBox")
      
    result,data = mail.search(None,'SUBJECT','"{}"'.format(subject))
    inBox_item_list_subject = data[0].split()
  
    result,'FROM','"{}"'.format(fromEmail))
    inBox_item_list_sender = data[0].split() 
        
    inBox_item_list = list(set(inBox_item_list_subject) & set(inBox_item_list_sender))
    
    counter = 0
    for item in inBox_item_list:
        counter+=1
        
        result2,email_data = mail.fetch(item,'(RFC822)')
        raw_email = email_data[0][1].decode("utf-8")   
        email_message = email.message_from_string(raw_email)

        #getting information about the mail like to,from,date.
        to_ = email_message['To']         
        from_ = email_message['From']
        subject_ = email_message['Subject']
        date_ = email_message['date']
        
        # setting the format to save in text file. 
        to_ = "to: "
        from_ = "from: " + from_ + str("\n")
        date_ = "date: " + date_ + str("\n")
        subject__ = "subject: " + subject_ + str("\n")

        # accessing the subparts of email_message
        for part in email_message.walk():
            if part.get_content_maintype == 'multipart':
                continue
            content_type = part.get_content_type()
            content_disposition = str(part.get("Content-disposition"))
            
            filename = part.get_filename()

            ext = mimetypes.guess_extension(part.get_content_type())
            # allowing pdf,jpg,png and doc format only
            if ext == '.pdf' or ext == '.csv' or ext == '.png' or ext == '.docx' or ext == '.xlsx':
                if filename:
                    save_path = os.path.join(os.getcwd(),folderName,subject_)
                    
                    if not os.path.exists(save_path):
                        os.makedirs(save_path)
                    
                    with open(os.path.join(save_path,filename),'wb') as fp:
                        fp.write(part.get_payload(decode=True))
                        fp.close()

        # getting the body part of the mail.
            try:
                body = part.get_payload(decode=True).decode()   
            except:
                pass
        
        
        # saving the required information in a file named as "textfile.txt".
            if content_type == "text/plain" and "attachment" not in content_disposition:
                save_path = os.path.join(os.getcwd(),subject_)

                if not os.path.exists(save_path):
                    os.makedirs(save_path)

                filename = "textfile.txt"
                with open(os.path.join(save_path,'w+',encoding='utf-8') as fp:
                    fp.writelines(to_)
                    fp.writelines(from_)
                    fp.writelines(date_)
                    fp.writelines(subject__)
                    fp.writelines(body)
                    fp.close()
    mail.close()
    mail.logout()

我希望存储正文和附件,即使是转发电子邮件??

解决方法

似乎您已经拥有要提取附件的部分。 尝试使用此代码检索多部分电子邮件的正文。

您可能需要弄清楚如何将您的部分与此部分合并。

def getAll(username,password,folderName):

     mail = imaplib.IMAP4_SSL("imap.outlook.com")    
     mail.login(username,password)
     print("Login success..........")
    
     mail.select("INBOX")
    
     result,data = mail.search(None,'(FROM "user@gmail.com" SUBJECT "Subject-Name")')

     for num in data[0].split():
         h,d = mail.fetch(num,'(RFC822)')
         raw_email = d[0][1].decode("utf-8")

         message = email.message_from_string(raw_email)
         email_from = str(make_header(decode_header(message['From'])))
         subject = str(make_header(decode_header(message['Subject'])))

         print("SUBJECT: "+ subject)
         print("FROM: "+ email_from)
        
         msg_encoding = 'iso-2022-jp'

         if message.is_multipart() == False:
             single  = bytearray(message.get_payload(),msg_encoding)
             body = single.decode(encoding = msg_encoding)
        else:  
             multi = message.get_payload()[0]
             body = multi.get_payload(decode=True).decode(encoding = msg_encoding)
             body = re.sub('<[^<]+?>','',body) # Remove special characters

         print("Printing the body:" + body)

版权声明:本文内容由互联网用户自发贡献,该文观点与技术仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容, 请发送邮件至 dio@foxmail.com 举报,一经查实,本站将立刻删除。