Initial commit
This commit is contained in:
1
calendar_links.html
Normal file
1
calendar_links.html
Normal file
@ -0,0 +1 @@
|
||||
<h1>Calendar Links</h1><ul><li>No valid events with the specified date format found in the rows.</li></ul>
|
||||
197
generate_links.py
Normal file
197
generate_links.py
Normal file
@ -0,0 +1,197 @@
|
||||
# use command line with URL of school calendar then any guest emails -g email.com -g email.com
|
||||
# python generate_links.py https://newsletters.naavi.com/i/JKdOO0M/issue-16/page/5 -g joanna.gilligan@gmail.com
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
from datetime import datetime
|
||||
import urllib.parse
|
||||
from datetime import date
|
||||
import sys
|
||||
import argparse
|
||||
import re
|
||||
|
||||
def create_google_calendar_url(title, start_time, end_time=None, description="", location="", guests=None):
|
||||
"""
|
||||
Generates a Google Calendar "Add to Calendar" URL.
|
||||
|
||||
Args:
|
||||
title (str): The title of the event.
|
||||
start_time (datetime): The start time of the event.
|
||||
end_time (datetime, optional): The end time of the event. Defaults to None.
|
||||
description (str, optional): The event description. Defaults to "".
|
||||
location (str, optional): The event location. Defaults to "".
|
||||
guests (list, optional): A list of guest email addresses. Defaults to None.
|
||||
|
||||
Returns:
|
||||
str: The Google Calendar URL.
|
||||
"""
|
||||
base_url = "https://www.google.com/calendar/render?action=TEMPLATE"
|
||||
|
||||
# Format date and time for Google Calendar URL (YYYYMMDDTHHMMSS or YYYYMMDDTHHMMSSZ)
|
||||
start_time_str = start_time.strftime("%Y%m%dT%H%M%S")
|
||||
|
||||
# For single time events, use the start time as both start and end
|
||||
if end_time:
|
||||
end_time_str = end_time.strftime("%Y%m%dT%H%M%S")
|
||||
dates_param = f"{start_time_str}/{end_time_str}"
|
||||
else:
|
||||
dates_param = f"{start_time_str}/{start_time_str}" # Use start/start for single time events
|
||||
|
||||
|
||||
params = {
|
||||
"text": title,
|
||||
"dates": dates_param,
|
||||
"details": description,
|
||||
"location": location
|
||||
}
|
||||
|
||||
print(f" In create_google_calendar_url - guests: {guests}")
|
||||
|
||||
# --- THIS IS THE PART THAT ADDS THE GUESTS ---
|
||||
if guests:
|
||||
params["add"] = ",".join(guests)
|
||||
# -------------------------------------------
|
||||
|
||||
# Encode parameters
|
||||
encoded_params = urllib.parse.urlencode(params)
|
||||
|
||||
return f"{base_url}&{encoded_params}"
|
||||
|
||||
def generate_calendar_links_from_rows(url, guests=None):
|
||||
"""
|
||||
Fetches a webpage, looks for rows with a specific date format and title,
|
||||
and generates an HTML string with Google Calendar links.
|
||||
|
||||
Args:
|
||||
url (str): The URL of the webpage.
|
||||
|
||||
Returns:
|
||||
str: An HTML string with calendar links, or None if an error occurs.
|
||||
"""
|
||||
try:
|
||||
response = requests.get(url)
|
||||
response.raise_for_status() # Raise an exception for bad status codes
|
||||
except requests.exceptions.RequestException as e:
|
||||
print(f"Error fetching the URL: {e}")
|
||||
return None
|
||||
|
||||
if not response.content:
|
||||
print("Error: Received empty content from the URL.")
|
||||
return None
|
||||
|
||||
try:
|
||||
soup = BeautifulSoup(response.content, 'html.parser')
|
||||
except Exception as e:
|
||||
print(f"Error parsing HTML content: {e}")
|
||||
# print(f"Beginning of content: {response.content[:500].decode('utf-8', errors='ignore')}")
|
||||
return None
|
||||
|
||||
html_output = "<h1>Calendar Links</h1><ul>"
|
||||
events = []
|
||||
current_year = date.today().year # Get the current year
|
||||
|
||||
# Find all table rows (<tr>) on the page
|
||||
rows = soup.find_all('tr')
|
||||
|
||||
if not rows:
|
||||
html_output += "<li>No table rows found on the page.</li>"
|
||||
html_output += "</ul>"
|
||||
return html_output
|
||||
|
||||
for i, row in enumerate(rows):
|
||||
# Get the text content of the row, replacing with a space for easier splitting
|
||||
row_text = row.get_text(" ", strip=True).replace('\xa0', ' ')
|
||||
|
||||
# Look for a pattern that matches "DayOfWeek DayOfMonth Month" at the beginning of the string
|
||||
# Example: "Sat 09 August"
|
||||
# We'll use regex for a more robust match
|
||||
import re
|
||||
date_match = re.match(r'^\w{3}\s+\d{1,2}\s+\w+', row_text)
|
||||
|
||||
if date_match:
|
||||
date_str_raw = date_match.group(0)
|
||||
remaining_text = row_text[len(date_str_raw):].strip()
|
||||
|
||||
print(f"Processing row {i+1}:")
|
||||
print(f" Raw date string found: '{date_str_raw}'")
|
||||
# Attempt to parse the date string
|
||||
try:
|
||||
# Assuming the date format is "DayOfWeek DayOfMonth Month" (e.g., "Sat 09 August")
|
||||
# We'll add the current year and a default time (e.g., 00:00) for parsing
|
||||
# The actual time will be extracted separately if needed or assumed.
|
||||
date_for_parsing = f"{date_str_raw} {current_year} 07:00"
|
||||
print(f" String for parsing: '{date_for_parsing}'")
|
||||
# Format string: "%a %d %B %Y %H:%M" for "Sat 09 August 2023 00:00"
|
||||
event_datetime = datetime.strptime(date_for_parsing, "%a %d %B %Y %H:%M")
|
||||
print(f" Parsed datetime object: {event_datetime}")
|
||||
|
||||
# Look for the last occurrence of a space (or series of spaces) after the date
|
||||
# and assume the title is after that. This might need refinement based on actual data.
|
||||
# Alternatively, look for the content after the date match.
|
||||
title_parts = remaining_text.split(' ')
|
||||
# Find the first non-empty part after the date
|
||||
title = "Event" # Default title
|
||||
for part in title_parts:
|
||||
if part:
|
||||
title = part
|
||||
break
|
||||
|
||||
# A more robust way might be to look for the content after the first few words of the date match
|
||||
title_start_index = len(date_str_raw) + 1
|
||||
if title_start_index < len(row_text):
|
||||
title = row_text[title_start_index:].strip() or "Event"
|
||||
|
||||
# If the event also has a time in the row, you might need to extract that
|
||||
# and update event_datetime. This requires more specific pattern matching
|
||||
# for time (e.g., "HH:MM"). For simplicity, we'll use the parsed date with
|
||||
# a default time from the date parsing.
|
||||
|
||||
calendar_url = create_google_calendar_url(
|
||||
title=title,
|
||||
start_time=event_datetime,
|
||||
guests=guests # Pass the guest emails if provided
|
||||
# Add description, end_time, location if available
|
||||
)
|
||||
print(f" Generated Calendar URL (start_time part): {calendar_url.split('&dates=')[1].split('&')[0]}")
|
||||
print("-" * 20) # Separator for clarity
|
||||
print(f" Full generated Calendar URL: {calendar_url}")
|
||||
|
||||
events.append({"title": title, "url": calendar_url})
|
||||
|
||||
except ValueError as e:
|
||||
print(f"Could not parse date string '{date_str_raw}' in row {i+1}: {e}")
|
||||
continue # Skip this row if date parsing fails
|
||||
|
||||
|
||||
if not events:
|
||||
html_output += "<li>No valid events with the specified date format found in the rows.</li>"
|
||||
else:
|
||||
for event in events:
|
||||
html_output += f'<li><a href="{event["url"]}" target="_blank">{event["title"]}</a></li>'
|
||||
|
||||
|
||||
html_output += "</ul>"
|
||||
|
||||
return html_output
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser(description="Generate Google Calendar links from a webpage table.")
|
||||
parser.add_argument("url", help="The URL of the webpage containing the event table.")
|
||||
parser.add_argument("-g", "--guest", action="append", help="Add a guest email address. Can be used multiple times.")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
print(f"Parsed arguments (args): {args}")
|
||||
print(f"Guest emails variable (guest_emails): {args.guest}")
|
||||
|
||||
target_url = args.url
|
||||
guest_emails = args.guest # This will be a list of guest emails or None
|
||||
|
||||
generated_html = generate_calendar_links_from_rows(target_url, guests=guest_emails) # Pass guests to the function
|
||||
|
||||
if generated_html:
|
||||
# print(generated_html) # Print to console
|
||||
|
||||
# To save to a file:
|
||||
with open("calendar_links.html", "w") as f:
|
||||
f.write(generated_html)
|
||||
print("\nHTML output saved to calendar_links.html")
|
||||
Reference in New Issue
Block a user