From 60959a0d76d73dc813d10f5270818d0369c09591 Mon Sep 17 00:00:00 2001 From: Sam Date: Thu, 29 May 2025 13:10:05 +1000 Subject: [PATCH] Initial commit --- calendar_links.html | 1 + generate_links.py | 197 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 198 insertions(+) create mode 100644 calendar_links.html create mode 100644 generate_links.py diff --git a/calendar_links.html b/calendar_links.html new file mode 100644 index 0000000..320886a --- /dev/null +++ b/calendar_links.html @@ -0,0 +1 @@ +

Calendar Links

\ No newline at end of file diff --git a/generate_links.py b/generate_links.py new file mode 100644 index 0000000..74e68de --- /dev/null +++ b/generate_links.py @@ -0,0 +1,197 @@ +# use command line with URL of school calendar then any guest emails -g email.com -g email.com +# python generate_links.py https://newsletters.naavi.com/i/JKdOO0M/issue-16/page/5 -g joanna.gilligan@gmail.com +import requests +from bs4 import BeautifulSoup +from datetime import datetime +import urllib.parse +from datetime import date +import sys +import argparse +import re + +def create_google_calendar_url(title, start_time, end_time=None, description="", location="", guests=None): + """ + Generates a Google Calendar "Add to Calendar" URL. + + Args: + title (str): The title of the event. + start_time (datetime): The start time of the event. + end_time (datetime, optional): The end time of the event. Defaults to None. + description (str, optional): The event description. Defaults to "". + location (str, optional): The event location. Defaults to "". + guests (list, optional): A list of guest email addresses. Defaults to None. + + Returns: + str: The Google Calendar URL. + """ + base_url = "https://www.google.com/calendar/render?action=TEMPLATE" + + # Format date and time for Google Calendar URL (YYYYMMDDTHHMMSS or YYYYMMDDTHHMMSSZ) + start_time_str = start_time.strftime("%Y%m%dT%H%M%S") + + # For single time events, use the start time as both start and end + if end_time: + end_time_str = end_time.strftime("%Y%m%dT%H%M%S") + dates_param = f"{start_time_str}/{end_time_str}" + else: + dates_param = f"{start_time_str}/{start_time_str}" # Use start/start for single time events + + + params = { + "text": title, + "dates": dates_param, + "details": description, + "location": location + } + + print(f" In create_google_calendar_url - guests: {guests}") + + # --- THIS IS THE PART THAT ADDS THE GUESTS --- + if guests: + params["add"] = ",".join(guests) + # ------------------------------------------- + + # Encode parameters + encoded_params = urllib.parse.urlencode(params) + + return f"{base_url}&{encoded_params}" + +def generate_calendar_links_from_rows(url, guests=None): + """ + Fetches a webpage, looks for rows with a specific date format and title, + and generates an HTML string with Google Calendar links. + + Args: + url (str): The URL of the webpage. + + Returns: + str: An HTML string with calendar links, or None if an error occurs. + """ + try: + response = requests.get(url) + response.raise_for_status() # Raise an exception for bad status codes + except requests.exceptions.RequestException as e: + print(f"Error fetching the URL: {e}") + return None + + if not response.content: + print("Error: Received empty content from the URL.") + return None + + try: + soup = BeautifulSoup(response.content, 'html.parser') + except Exception as e: + print(f"Error parsing HTML content: {e}") + # print(f"Beginning of content: {response.content[:500].decode('utf-8', errors='ignore')}") + return None + + html_output = "

Calendar Links

" + return html_output + + for i, row in enumerate(rows): + # Get the text content of the row, replacing   with a space for easier splitting + row_text = row.get_text(" ", strip=True).replace('\xa0', ' ') + + # Look for a pattern that matches "DayOfWeek DayOfMonth Month" at the beginning of the string + # Example: "Sat 09 August" + # We'll use regex for a more robust match + import re + date_match = re.match(r'^\w{3}\s+\d{1,2}\s+\w+', row_text) + + if date_match: + date_str_raw = date_match.group(0) + remaining_text = row_text[len(date_str_raw):].strip() + + print(f"Processing row {i+1}:") + print(f" Raw date string found: '{date_str_raw}'") + # Attempt to parse the date string + try: + # Assuming the date format is "DayOfWeek DayOfMonth Month" (e.g., "Sat 09 August") + # We'll add the current year and a default time (e.g., 00:00) for parsing + # The actual time will be extracted separately if needed or assumed. + date_for_parsing = f"{date_str_raw} {current_year} 07:00" + print(f" String for parsing: '{date_for_parsing}'") + # Format string: "%a %d %B %Y %H:%M" for "Sat 09 August 2023 00:00" + event_datetime = datetime.strptime(date_for_parsing, "%a %d %B %Y %H:%M") + print(f" Parsed datetime object: {event_datetime}") + + # Look for the last occurrence of a space (or series of spaces) after the date + # and assume the title is after that. This might need refinement based on actual data. + # Alternatively, look for the content after the date match. + title_parts = remaining_text.split(' ') + # Find the first non-empty part after the date + title = "Event" # Default title + for part in title_parts: + if part: + title = part + break + + # A more robust way might be to look for the content after the first few words of the date match + title_start_index = len(date_str_raw) + 1 + if title_start_index < len(row_text): + title = row_text[title_start_index:].strip() or "Event" + + # If the event also has a time in the row, you might need to extract that + # and update event_datetime. This requires more specific pattern matching + # for time (e.g., "HH:MM"). For simplicity, we'll use the parsed date with + # a default time from the date parsing. + + calendar_url = create_google_calendar_url( + title=title, + start_time=event_datetime, + guests=guests # Pass the guest emails if provided + # Add description, end_time, location if available + ) + print(f" Generated Calendar URL (start_time part): {calendar_url.split('&dates=')[1].split('&')[0]}") + print("-" * 20) # Separator for clarity + print(f" Full generated Calendar URL: {calendar_url}") + + events.append({"title": title, "url": calendar_url}) + + except ValueError as e: + print(f"Could not parse date string '{date_str_raw}' in row {i+1}: {e}") + continue # Skip this row if date parsing fails + + + if not events: + html_output += "
  • No valid events with the specified date format found in the rows.
  • " + else: + for event in events: + html_output += f'
  • {event["title"]}
  • ' + + + html_output += "" + + return html_output + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Generate Google Calendar links from a webpage table.") + parser.add_argument("url", help="The URL of the webpage containing the event table.") + parser.add_argument("-g", "--guest", action="append", help="Add a guest email address. Can be used multiple times.") + + args = parser.parse_args() + + print(f"Parsed arguments (args): {args}") + print(f"Guest emails variable (guest_emails): {args.guest}") + + target_url = args.url + guest_emails = args.guest # This will be a list of guest emails or None + + generated_html = generate_calendar_links_from_rows(target_url, guests=guest_emails) # Pass guests to the function + + if generated_html: + # print(generated_html) # Print to console + + # To save to a file: + with open("calendar_links.html", "w") as f: + f.write(generated_html) + print("\nHTML output saved to calendar_links.html") \ No newline at end of file