-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathcapstone_project.py
More file actions
80 lines (67 loc) · 2.61 KB
/
capstone_project.py
File metadata and controls
80 lines (67 loc) · 2.61 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
# Capstone Project: End-to-End Automation Pipeline
import os
import requests
import pandas as pd
from bs4 import BeautifulSoup
from PyPDF2 import PdfMerger
from email.mime.text import MIMEText
from email.mime.multipart import MIMEMultipart
import smtplib
def scrape_data(url):
response = requests.get(url)
soup = BeautifulSoup(response.text, 'html.parser')
data = []
for item in soup.select('div.item'): # Adjust selector to match the website structure
name = item.select_one('h2.name').text.strip()
price = item.select_one('span.price').text.strip()
data.append({'Name': name, 'Price': price})
return data
def save_to_excel(data, file_path):
df = pd.DataFrame(data)
df.to_excel(file_path, index=False)
print(f"Data saved to {file_path}")
def merge_pdfs(pdf_list, output_path):
merger = PdfMerger()
for pdf in pdf_list:
merger.append(pdf)
merger.write(output_path)
merger.close()
print(f"PDFs merged into {output_path}")
def send_email_with_attachment(sender_email, receiver_email, password, subject, body, attachment):
msg = MIMEMultipart()
msg['From'] = sender_email
msg['To'] = receiver_email
msg['Subject'] = subject
msg.attach(MIMEText(body, 'plain'))
with open(attachment, 'rb') as file:
part = MIMEText(file.read(), 'base64', 'utf-8')
part.add_header('Content-Disposition', f'attachment; filename={os.path.basename(attachment)}')
msg.attach(part)
with smtplib.SMTP('smtp.example.com', 587) as server:
server.starttls()
server.login(sender_email, password)
server.send_message(msg)
print(f"Email sent to {receiver_email}")
# Main Execution
def main():
# Step 1: Scrape data
url = 'https://example.com/products'
data = scrape_data(url)
# Step 2: Save data to Excel
excel_file = os.path.join(base_dir, 'Capstone_Project', 'scraped_data.xlsx')
save_to_excel(data, excel_file)
# Step 3: Merge PDFs
pdfs = [os.path.join(base_dir, 'Capstone_Project', f'file{i}.pdf') for i in range(1, 3)] # Example PDF paths
merged_pdf = os.path.join(base_dir, 'Capstone_Project', 'merged_output.pdf')
merge_pdfs(pdfs, merged_pdf)
# Step 4: Send Email
send_email_with_attachment(
sender_email='your_email@example.com',
receiver_email='receiver_email@example.com',
password='your_password',
subject='Capstone Project Output',
body='Please find the attached files.',
attachment=excel_file
)
if __name__ == '__main__':
main()