From 4399945a7b99f04f2e0d5380776a72f842f07c67 Mon Sep 17 00:00:00 2001 From: cmahns Date: Wed, 4 Mar 2026 14:10:10 +0000 Subject: [PATCH] transfer @zwaxman's code from drive --- README.md | 39 +++++++ classify_members.py | 254 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 293 insertions(+) create mode 100644 README.md create mode 100644 classify_members.py diff --git a/README.md b/README.md new file mode 100644 index 0000000..e62b263 --- /dev/null +++ b/README.md @@ -0,0 +1,39 @@ +# Member Classification Script + +Classifies members from an email list into three categories: +- **not-in-good-standing**: Status is not "Member in Good Standing" (including empty values) +- **existing-member**: In good standing and already in the member database +- **new-member**: In good standing but not yet in the database + +## Requirements + +Python 3.6+ (no external dependencies) + +## Usage + +### Basic Usage +```bash +python classify_members.py +``` + +Uses default files: `AllMembersEmails-20FEB2026.csv` and `nnjdsamembers.csv` + +### Custom Files +```bash +python classify_members.py --all-members your-file.csv --member-db your-db.csv +``` + +## Input Files + +**All Members File**: CSV with columns `email`, `membership_status`, `list_date` + +**Member Database File**: CSV with "Email address" column (first descriptive line is skipped) + +## Output + +Creates a folder named after the date extracted from the input filename (e.g., `20FEB2026/`) containing: +- `{DATE}_not-in-good-standing.csv` +- `{DATE}_existing-member.csv` +- `{DATE}_new-member.csv` + +All original columns are preserved in the output files. diff --git a/classify_members.py b/classify_members.py new file mode 100644 index 0000000..b00d7ea --- /dev/null +++ b/classify_members.py @@ -0,0 +1,254 @@ +#!/usr/bin/env python3 +""" +Member Classification Script + +Classifies members from an email list into three categories: +- not-in-good-standing: Members whose status is not "Member in Good Standing" +- existing-member: Members in good standing who already exist in the member database +- new-member: Members in good standing who are not yet in the member database +""" + +import csv +import argparse +import os +import re +from pathlib import Path +from collections import defaultdict + + +def extract_date_from_filename(filename): + """ + Extract date string from filename (e.g., "20FEB2026" from "AllMembersEmails-20FEB2026.csv"). + + Args: + filename: Input filename + + Returns: + Date string (e.g., "20FEB2026") or None if not found + """ + # Pattern to match date format like "20FEB2026" or "20FEB2026" in filename + # Looks for pattern: digits + 3 letters + digits + pattern = r'(\d{1,2}[A-Z]{3}\d{4})' + match = re.search(pattern, filename, re.IGNORECASE) + if match: + return match.group(1).upper() + return None + + +def load_member_emails(member_db_path): + """ + Load email addresses from the member database CSV file. + Skips the first descriptive line and reads from the header row. + + Args: + member_db_path: Path to the member database CSV file + + Returns: + set: Set of email addresses (lowercase for case-insensitive matching) + """ + member_emails = set() + + try: + with open(member_db_path, 'r', encoding='utf-8') as f: + reader = csv.reader(f) + # Skip the first descriptive line + next(reader, None) + # Read header row + headers = next(reader, None) + if not headers: + print(f"Warning: {member_db_path} appears to be empty or malformed") + return member_emails + + # Find the "Email address" column index + try: + email_col_index = headers.index('Email address') + except ValueError: + print(f"Error: 'Email address' column not found in {member_db_path}") + print(f"Available columns: {headers}") + return member_emails + + # Read all email addresses + for row in reader: + if len(row) > email_col_index and row[email_col_index]: + email = row[email_col_index].strip().lower() + if email: + member_emails.add(email) + + print(f"Loaded {len(member_emails)} email addresses from member database") + return member_emails + + except FileNotFoundError: + print(f"Error: File not found: {member_db_path}") + return member_emails + except Exception as e: + print(f"Error reading {member_db_path}: {e}") + return member_emails + + +def classify_members(all_members_path, member_db_path): + """ + Classify members from the email list into three categories. + + Args: + all_members_path: Path to the all members email CSV file + member_db_path: Path to the member database CSV file + + Returns: + tuple: (not_in_good_standing, existing_members, new_members, date_string) + Each is a list of dictionaries representing CSV rows + """ + # Load existing member emails + member_emails = load_member_emails(member_db_path) + + # Extract date from filename + date_string = extract_date_from_filename(os.path.basename(all_members_path)) + if not date_string: + print(f"Warning: Could not extract date from filename: {all_members_path}") + date_string = "UNKNOWN_DATE" + + # Classify members + not_in_good_standing = [] + existing_members = [] + new_members = [] + + try: + with open(all_members_path, 'r', encoding='utf-8') as f: + reader = csv.DictReader(f) + + # Verify required columns exist + if 'email' not in reader.fieldnames or 'membership_status' not in reader.fieldnames: + print(f"Error: Required columns not found. Expected 'email' and 'membership_status'") + print(f"Found columns: {reader.fieldnames}") + return [], [], [], date_string + + row_count = 0 + for row in reader: + row_count += 1 + email = row.get('email', '').strip() + membership_status = row.get('membership_status', '').strip() + + # Check membership status + if membership_status != 'Member in Good Standing': + not_in_good_standing.append(row) + else: + # Check if email exists in member database (case-insensitive) + email_lower = email.lower() + if email_lower in member_emails: + existing_members.append(row) + else: + new_members.append(row) + + print(f"Processed {row_count} rows") + print(f" - Not in good standing: {len(not_in_good_standing)}") + print(f" - Existing members: {len(existing_members)}") + print(f" - New members: {len(new_members)}") + + except FileNotFoundError: + print(f"Error: File not found: {all_members_path}") + return [], [], [], date_string + except Exception as e: + print(f"Error reading {all_members_path}: {e}") + return [], [], [], date_string + + return not_in_good_standing, existing_members, new_members, date_string + + +def write_csv_file(data, output_path, fieldnames): + """ + Write data to a CSV file. + + Args: + data: List of dictionaries representing rows + output_path: Path to output CSV file + fieldnames: List of column names + """ + try: + with open(output_path, 'w', encoding='utf-8', newline='') as f: + writer = csv.DictWriter(f, fieldnames=fieldnames) + writer.writeheader() + writer.writerows(data) + print(f" Written: {output_path} ({len(data)} rows)") + except Exception as e: + print(f"Error writing {output_path}: {e}") + + +def main(): + """Main function to run the member classification script.""" + parser = argparse.ArgumentParser( + description='Classify members into not-in-good-standing, existing-member, and new-member categories' + ) + parser.add_argument( + '--all-members', + default='AllMembersEmails-20FEB2026.csv', + help='Path to the all members email CSV file (default: AllMembersEmails-20FEB2026.csv)' + ) + parser.add_argument( + '--member-db', + default='nnjdsamembers.csv', + help='Path to the member database CSV file (default: nnjdsamembers.csv)' + ) + + args = parser.parse_args() + + print("=" * 60) + print("Member Classification Script") + print("=" * 60) + print(f"All members file: {args.all_members}") + print(f"Member database: {args.member_db}") + print() + + # Classify members + not_in_good_standing, existing_members, new_members, date_string = classify_members( + args.all_members, + args.member_db + ) + + if not date_string: + print("Error: Could not determine output directory name") + return + + # Create output directory + output_dir = Path(date_string) + output_dir.mkdir(exist_ok=True) + print(f"\nOutput directory: {output_dir}/") + + # Get fieldnames from the first row (all should have same structure) + fieldnames = None + if not_in_good_standing: + fieldnames = list(not_in_good_standing[0].keys()) + elif existing_members: + fieldnames = list(existing_members[0].keys()) + elif new_members: + fieldnames = list(new_members[0].keys()) + else: + print("Warning: No data to write") + return + + # Write output files + print("\nWriting output files...") + + write_csv_file( + not_in_good_standing, + output_dir / f"{date_string}_not-in-good-standing.csv", + fieldnames + ) + + write_csv_file( + existing_members, + output_dir / f"{date_string}_existing-member.csv", + fieldnames + ) + + write_csv_file( + new_members, + output_dir / f"{date_string}_new-member.csv", + fieldnames + ) + + print("\n" + "=" * 60) + print("Classification complete!") + print("=" * 60) + + +if __name__ == '__main__': + main()