# use command line with URL of school calendar then any guest emails -g email.com -g email.com # python generate_links.py https://newsletters.naavi.com/i/JKdOO0M/issue-16/page/5 -g joanna.gilligan@gmail.com import requests from bs4 import BeautifulSoup from datetime import datetime import urllib.parse from datetime import date import sys import argparse import re def create_google_calendar_url(title, start_time, end_time=None, description="", location="", guests=None): """ Generates a Google Calendar "Add to Calendar" URL. Args: title (str): The title of the event. start_time (datetime): The start time of the event. end_time (datetime, optional): The end time of the event. Defaults to None. description (str, optional): The event description. Defaults to "". location (str, optional): The event location. Defaults to "". guests (list, optional): A list of guest email addresses. Defaults to None. Returns: str: The Google Calendar URL. """ base_url = "https://www.google.com/calendar/render?action=TEMPLATE" # Format date and time for Google Calendar URL (YYYYMMDDTHHMMSS or YYYYMMDDTHHMMSSZ) start_time_str = start_time.strftime("%Y%m%dT%H%M%S") # For single time events, use the start time as both start and end if end_time: end_time_str = end_time.strftime("%Y%m%dT%H%M%S") dates_param = f"{start_time_str}/{end_time_str}" else: dates_param = f"{start_time_str}/{start_time_str}" # Use start/start for single time events params = { "text": title, "dates": dates_param, "details": description, "location": location } print(f" In create_google_calendar_url - guests: {guests}") # --- THIS IS THE PART THAT ADDS THE GUESTS --- if guests: params["add"] = ",".join(guests) # ------------------------------------------- # Encode parameters encoded_params = urllib.parse.urlencode(params) return f"{base_url}&{encoded_params}" def generate_calendar_links_from_rows(url, guests=None): """ Fetches a webpage, looks for rows with a specific date format and title, and generates an HTML string with Google Calendar links. Args: url (str): The URL of the webpage. Returns: str: An HTML string with calendar links, or None if an error occurs. """ try: response = requests.get(url) response.raise_for_status() # Raise an exception for bad status codes except requests.exceptions.RequestException as e: print(f"Error fetching the URL: {e}") return None if not response.content: print("Error: Received empty content from the URL.") return None try: soup = BeautifulSoup(response.content, 'html.parser') except Exception as e: print(f"Error parsing HTML content: {e}") # print(f"Beginning of content: {response.content[:500].decode('utf-8', errors='ignore')}") return None html_output = "

Calendar Links

No table rows found on the page.

" return html_output for i, row in enumerate(rows): # Get the text content of the row, replacing with a space for easier splitting row_text = row.get_text(" ", strip=True).replace('\xa0', ' ') # Look for a pattern that matches "DayOfWeek DayOfMonth Month" at the beginning of the string # Example: "Sat 09 August" # We'll use regex for a more robust match import re date_match = re.match(r'^\w{3}\s+\d{1,2}\s+\w+', row_text) if date_match: date_str_raw = date_match.group(0) remaining_text = row_text[len(date_str_raw):].strip() print(f"Processing row {i+1}:") print(f" Raw date string found: '{date_str_raw}'") # Attempt to parse the date string try: # Assuming the date format is "DayOfWeek DayOfMonth Month" (e.g., "Sat 09 August") # We'll add the current year and a default time (e.g., 00:00) for parsing # The actual time will be extracted separately if needed or assumed. date_for_parsing = f"{date_str_raw} {current_year} 07:00" print(f" String for parsing: '{date_for_parsing}'") # Format string: "%a %d %B %Y %H:%M" for "Sat 09 August 2023 00:00" event_datetime = datetime.strptime(date_for_parsing, "%a %d %B %Y %H:%M") print(f" Parsed datetime object: {event_datetime}") # Look for the last occurrence of a space (or series of spaces) after the date # and assume the title is after that. This might need refinement based on actual data. # Alternatively, look for the content after the date match. title_parts = remaining_text.split(' ') # Find the first non-empty part after the date title = "Event" # Default title for part in title_parts: if part: title = part break # A more robust way might be to look for the content after the first few words of the date match title_start_index = len(date_str_raw) + 1 if title_start_index < len(row_text): title = row_text[title_start_index:].strip() or "Event" # If the event also has a time in the row, you might need to extract that # and update event_datetime. This requires more specific pattern matching # for time (e.g., "HH:MM"). For simplicity, we'll use the parsed date with # a default time from the date parsing. calendar_url = create_google_calendar_url( title=title, start_time=event_datetime, guests=guests # Pass the guest emails if provided # Add description, end_time, location if available ) print(f" Generated Calendar URL (start_time part): {calendar_url.split('&dates=')[1].split('&')[0]}") print("-" * 20) # Separator for clarity print(f" Full generated Calendar URL: {calendar_url}") events.append({"title": title, "url": calendar_url}) except ValueError as e: print(f"Could not parse date string '{date_str_raw}' in row {i+1}: {e}") continue # Skip this row if date parsing fails if not events: html_output += "

No valid events with the specified date format found in the rows.

" else: for event in events: html_output += f'

{event["title"]}

' html_output += "" return html_output if __name__ == "__main__": parser = argparse.ArgumentParser(description="Generate Google Calendar links from a webpage table.") parser.add_argument("url", help="The URL of the webpage containing the event table.") parser.add_argument("-g", "--guest", action="append", help="Add a guest email address. Can be used multiple times.") args = parser.parse_args() print(f"Parsed arguments (args): {args}") print(f"Guest emails variable (guest_emails): {args.guest}") target_url = args.url guest_emails = args.guest # This will be a list of guest emails or None generated_html = generate_calendar_links_from_rows(target_url, guests=guest_emails) # Pass guests to the function if generated_html: # print(generated_html) # Print to console # To save to a file: with open("calendar_links.html", "w") as f: f.write(generated_html) print("\nHTML output saved to calendar_links.html")