# use command line with URL of school calendar then any guest emails -g email.com -g email.com
# python generate_links.py https://newsletters.naavi.com/i/JKdOO0M/issue-16/page/5 -g joanna.gilligan@gmail.com
import requests
from bs4 import BeautifulSoup
from datetime import datetime
import urllib.parse
from datetime import date
import sys
import argparse
import re
def create_google_calendar_url(title, start_time, end_time=None, description="", location="", guests=None):
"""
Generates a Google Calendar "Add to Calendar" URL.
Args:
title (str): The title of the event.
start_time (datetime): The start time of the event.
end_time (datetime, optional): The end time of the event. Defaults to None.
description (str, optional): The event description. Defaults to "".
location (str, optional): The event location. Defaults to "".
guests (list, optional): A list of guest email addresses. Defaults to None.
Returns:
str: The Google Calendar URL.
"""
base_url = "https://www.google.com/calendar/render?action=TEMPLATE"
# Format date and time for Google Calendar URL (YYYYMMDDTHHMMSS or YYYYMMDDTHHMMSSZ)
start_time_str = start_time.strftime("%Y%m%dT%H%M%S")
# For single time events, use the start time as both start and end
if end_time:
end_time_str = end_time.strftime("%Y%m%dT%H%M%S")
dates_param = f"{start_time_str}/{end_time_str}"
else:
dates_param = f"{start_time_str}/{start_time_str}" # Use start/start for single time events
params = {
"text": title,
"dates": dates_param,
"details": description,
"location": location
}
print(f" In create_google_calendar_url - guests: {guests}")
# --- THIS IS THE PART THAT ADDS THE GUESTS ---
if guests:
params["add"] = ",".join(guests)
# -------------------------------------------
# Encode parameters
encoded_params = urllib.parse.urlencode(params)
return f"{base_url}&{encoded_params}"
def generate_calendar_links_from_rows(url, guests=None):
"""
Fetches a webpage, looks for rows with a specific date format and title,
and generates an HTML string with Google Calendar links.
Args:
url (str): The URL of the webpage.
Returns:
str: An HTML string with calendar links, or None if an error occurs.
"""
try:
response = requests.get(url)
response.raise_for_status() # Raise an exception for bad status codes
except requests.exceptions.RequestException as e:
print(f"Error fetching the URL: {e}")
return None
if not response.content:
print("Error: Received empty content from the URL.")
return None
try:
soup = BeautifulSoup(response.content, 'html.parser')
except Exception as e:
print(f"Error parsing HTML content: {e}")
# print(f"Beginning of content: {response.content[:500].decode('utf-8', errors='ignore')}")
return None
html_output = "
Calendar Links
"
events = []
current_year = date.today().year # Get the current year
# Find all table rows () on the page
rows = soup.find_all('tr')
if not rows:
html_output += "- No table rows found on the page.
"
html_output += ""
return html_output
for i, row in enumerate(rows):
# Get the text content of the row, replacing with a space for easier splitting
row_text = row.get_text(" ", strip=True).replace('\xa0', ' ')
# Look for a pattern that matches "DayOfWeek DayOfMonth Month" at the beginning of the string
# Example: "Sat 09 August"
# We'll use regex for a more robust match
import re
date_match = re.match(r'^\w{3}\s+\d{1,2}\s+\w+', row_text)
if date_match:
date_str_raw = date_match.group(0)
remaining_text = row_text[len(date_str_raw):].strip()
print(f"Processing row {i+1}:")
print(f" Raw date string found: '{date_str_raw}'")
# Attempt to parse the date string
try:
# Assuming the date format is "DayOfWeek DayOfMonth Month" (e.g., "Sat 09 August")
# We'll add the current year and a default time (e.g., 00:00) for parsing
# The actual time will be extracted separately if needed or assumed.
date_for_parsing = f"{date_str_raw} {current_year} 07:00"
print(f" String for parsing: '{date_for_parsing}'")
# Format string: "%a %d %B %Y %H:%M" for "Sat 09 August 2023 00:00"
event_datetime = datetime.strptime(date_for_parsing, "%a %d %B %Y %H:%M")
print(f" Parsed datetime object: {event_datetime}")
# Look for the last occurrence of a space (or series of spaces) after the date
# and assume the title is after that. This might need refinement based on actual data.
# Alternatively, look for the content after the date match.
title_parts = remaining_text.split(' ')
# Find the first non-empty part after the date
title = "Event" # Default title
for part in title_parts:
if part:
title = part
break
# A more robust way might be to look for the content after the first few words of the date match
title_start_index = len(date_str_raw) + 1
if title_start_index < len(row_text):
title = row_text[title_start_index:].strip() or "Event"
# If the event also has a time in the row, you might need to extract that
# and update event_datetime. This requires more specific pattern matching
# for time (e.g., "HH:MM"). For simplicity, we'll use the parsed date with
# a default time from the date parsing.
calendar_url = create_google_calendar_url(
title=title,
start_time=event_datetime,
guests=guests # Pass the guest emails if provided
# Add description, end_time, location if available
)
print(f" Generated Calendar URL (start_time part): {calendar_url.split('&dates=')[1].split('&')[0]}")
print("-" * 20) # Separator for clarity
print(f" Full generated Calendar URL: {calendar_url}")
events.append({"title": title, "url": calendar_url})
except ValueError as e:
print(f"Could not parse date string '{date_str_raw}' in row {i+1}: {e}")
continue # Skip this row if date parsing fails
if not events:
html_output += "- No valid events with the specified date format found in the rows.
"
else:
for event in events:
html_output += f'- {event["title"]}
'
html_output += ""
return html_output
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Generate Google Calendar links from a webpage table.")
parser.add_argument("url", help="The URL of the webpage containing the event table.")
parser.add_argument("-g", "--guest", action="append", help="Add a guest email address. Can be used multiple times.")
args = parser.parse_args()
print(f"Parsed arguments (args): {args}")
print(f"Guest emails variable (guest_emails): {args.guest}")
target_url = args.url
guest_emails = args.guest # This will be a list of guest emails or None
generated_html = generate_calendar_links_from_rows(target_url, guests=guest_emails) # Pass guests to the function
if generated_html:
# print(generated_html) # Print to console
# To save to a file:
with open("calendar_links.html", "w") as f:
f.write(generated_html)
print("\nHTML output saved to calendar_links.html")