transfer @zwaxman's code from drive
This commit is contained in:
39
README.md
Normal file
39
README.md
Normal file
@@ -0,0 +1,39 @@
|
|||||||
|
# Member Classification Script
|
||||||
|
|
||||||
|
Classifies members from an email list into three categories:
|
||||||
|
- **not-in-good-standing**: Status is not "Member in Good Standing" (including empty values)
|
||||||
|
- **existing-member**: In good standing and already in the member database
|
||||||
|
- **new-member**: In good standing but not yet in the database
|
||||||
|
|
||||||
|
## Requirements
|
||||||
|
|
||||||
|
Python 3.6+ (no external dependencies)
|
||||||
|
|
||||||
|
## Usage
|
||||||
|
|
||||||
|
### Basic Usage
|
||||||
|
```bash
|
||||||
|
python classify_members.py
|
||||||
|
```
|
||||||
|
|
||||||
|
Uses default files: `AllMembersEmails-20FEB2026.csv` and `nnjdsamembers.csv`
|
||||||
|
|
||||||
|
### Custom Files
|
||||||
|
```bash
|
||||||
|
python classify_members.py --all-members your-file.csv --member-db your-db.csv
|
||||||
|
```
|
||||||
|
|
||||||
|
## Input Files
|
||||||
|
|
||||||
|
**All Members File**: CSV with columns `email`, `membership_status`, `list_date`
|
||||||
|
|
||||||
|
**Member Database File**: CSV with "Email address" column (first descriptive line is skipped)
|
||||||
|
|
||||||
|
## Output
|
||||||
|
|
||||||
|
Creates a folder named after the date extracted from the input filename (e.g., `20FEB2026/`) containing:
|
||||||
|
- `{DATE}_not-in-good-standing.csv`
|
||||||
|
- `{DATE}_existing-member.csv`
|
||||||
|
- `{DATE}_new-member.csv`
|
||||||
|
|
||||||
|
All original columns are preserved in the output files.
|
||||||
254
classify_members.py
Normal file
254
classify_members.py
Normal file
@@ -0,0 +1,254 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
Member Classification Script
|
||||||
|
|
||||||
|
Classifies members from an email list into three categories:
|
||||||
|
- not-in-good-standing: Members whose status is not "Member in Good Standing"
|
||||||
|
- existing-member: Members in good standing who already exist in the member database
|
||||||
|
- new-member: Members in good standing who are not yet in the member database
|
||||||
|
"""
|
||||||
|
|
||||||
|
import csv
|
||||||
|
import argparse
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
from pathlib import Path
|
||||||
|
from collections import defaultdict
|
||||||
|
|
||||||
|
|
||||||
|
def extract_date_from_filename(filename):
|
||||||
|
"""
|
||||||
|
Extract date string from filename (e.g., "20FEB2026" from "AllMembersEmails-20FEB2026.csv").
|
||||||
|
|
||||||
|
Args:
|
||||||
|
filename: Input filename
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Date string (e.g., "20FEB2026") or None if not found
|
||||||
|
"""
|
||||||
|
# Pattern to match date format like "20FEB2026" or "20FEB2026" in filename
|
||||||
|
# Looks for pattern: digits + 3 letters + digits
|
||||||
|
pattern = r'(\d{1,2}[A-Z]{3}\d{4})'
|
||||||
|
match = re.search(pattern, filename, re.IGNORECASE)
|
||||||
|
if match:
|
||||||
|
return match.group(1).upper()
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def load_member_emails(member_db_path):
|
||||||
|
"""
|
||||||
|
Load email addresses from the member database CSV file.
|
||||||
|
Skips the first descriptive line and reads from the header row.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
member_db_path: Path to the member database CSV file
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
set: Set of email addresses (lowercase for case-insensitive matching)
|
||||||
|
"""
|
||||||
|
member_emails = set()
|
||||||
|
|
||||||
|
try:
|
||||||
|
with open(member_db_path, 'r', encoding='utf-8') as f:
|
||||||
|
reader = csv.reader(f)
|
||||||
|
# Skip the first descriptive line
|
||||||
|
next(reader, None)
|
||||||
|
# Read header row
|
||||||
|
headers = next(reader, None)
|
||||||
|
if not headers:
|
||||||
|
print(f"Warning: {member_db_path} appears to be empty or malformed")
|
||||||
|
return member_emails
|
||||||
|
|
||||||
|
# Find the "Email address" column index
|
||||||
|
try:
|
||||||
|
email_col_index = headers.index('Email address')
|
||||||
|
except ValueError:
|
||||||
|
print(f"Error: 'Email address' column not found in {member_db_path}")
|
||||||
|
print(f"Available columns: {headers}")
|
||||||
|
return member_emails
|
||||||
|
|
||||||
|
# Read all email addresses
|
||||||
|
for row in reader:
|
||||||
|
if len(row) > email_col_index and row[email_col_index]:
|
||||||
|
email = row[email_col_index].strip().lower()
|
||||||
|
if email:
|
||||||
|
member_emails.add(email)
|
||||||
|
|
||||||
|
print(f"Loaded {len(member_emails)} email addresses from member database")
|
||||||
|
return member_emails
|
||||||
|
|
||||||
|
except FileNotFoundError:
|
||||||
|
print(f"Error: File not found: {member_db_path}")
|
||||||
|
return member_emails
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error reading {member_db_path}: {e}")
|
||||||
|
return member_emails
|
||||||
|
|
||||||
|
|
||||||
|
def classify_members(all_members_path, member_db_path):
|
||||||
|
"""
|
||||||
|
Classify members from the email list into three categories.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
all_members_path: Path to the all members email CSV file
|
||||||
|
member_db_path: Path to the member database CSV file
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
tuple: (not_in_good_standing, existing_members, new_members, date_string)
|
||||||
|
Each is a list of dictionaries representing CSV rows
|
||||||
|
"""
|
||||||
|
# Load existing member emails
|
||||||
|
member_emails = load_member_emails(member_db_path)
|
||||||
|
|
||||||
|
# Extract date from filename
|
||||||
|
date_string = extract_date_from_filename(os.path.basename(all_members_path))
|
||||||
|
if not date_string:
|
||||||
|
print(f"Warning: Could not extract date from filename: {all_members_path}")
|
||||||
|
date_string = "UNKNOWN_DATE"
|
||||||
|
|
||||||
|
# Classify members
|
||||||
|
not_in_good_standing = []
|
||||||
|
existing_members = []
|
||||||
|
new_members = []
|
||||||
|
|
||||||
|
try:
|
||||||
|
with open(all_members_path, 'r', encoding='utf-8') as f:
|
||||||
|
reader = csv.DictReader(f)
|
||||||
|
|
||||||
|
# Verify required columns exist
|
||||||
|
if 'email' not in reader.fieldnames or 'membership_status' not in reader.fieldnames:
|
||||||
|
print(f"Error: Required columns not found. Expected 'email' and 'membership_status'")
|
||||||
|
print(f"Found columns: {reader.fieldnames}")
|
||||||
|
return [], [], [], date_string
|
||||||
|
|
||||||
|
row_count = 0
|
||||||
|
for row in reader:
|
||||||
|
row_count += 1
|
||||||
|
email = row.get('email', '').strip()
|
||||||
|
membership_status = row.get('membership_status', '').strip()
|
||||||
|
|
||||||
|
# Check membership status
|
||||||
|
if membership_status != 'Member in Good Standing':
|
||||||
|
not_in_good_standing.append(row)
|
||||||
|
else:
|
||||||
|
# Check if email exists in member database (case-insensitive)
|
||||||
|
email_lower = email.lower()
|
||||||
|
if email_lower in member_emails:
|
||||||
|
existing_members.append(row)
|
||||||
|
else:
|
||||||
|
new_members.append(row)
|
||||||
|
|
||||||
|
print(f"Processed {row_count} rows")
|
||||||
|
print(f" - Not in good standing: {len(not_in_good_standing)}")
|
||||||
|
print(f" - Existing members: {len(existing_members)}")
|
||||||
|
print(f" - New members: {len(new_members)}")
|
||||||
|
|
||||||
|
except FileNotFoundError:
|
||||||
|
print(f"Error: File not found: {all_members_path}")
|
||||||
|
return [], [], [], date_string
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error reading {all_members_path}: {e}")
|
||||||
|
return [], [], [], date_string
|
||||||
|
|
||||||
|
return not_in_good_standing, existing_members, new_members, date_string
|
||||||
|
|
||||||
|
|
||||||
|
def write_csv_file(data, output_path, fieldnames):
|
||||||
|
"""
|
||||||
|
Write data to a CSV file.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
data: List of dictionaries representing rows
|
||||||
|
output_path: Path to output CSV file
|
||||||
|
fieldnames: List of column names
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
with open(output_path, 'w', encoding='utf-8', newline='') as f:
|
||||||
|
writer = csv.DictWriter(f, fieldnames=fieldnames)
|
||||||
|
writer.writeheader()
|
||||||
|
writer.writerows(data)
|
||||||
|
print(f" Written: {output_path} ({len(data)} rows)")
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error writing {output_path}: {e}")
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
"""Main function to run the member classification script."""
|
||||||
|
parser = argparse.ArgumentParser(
|
||||||
|
description='Classify members into not-in-good-standing, existing-member, and new-member categories'
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
'--all-members',
|
||||||
|
default='AllMembersEmails-20FEB2026.csv',
|
||||||
|
help='Path to the all members email CSV file (default: AllMembersEmails-20FEB2026.csv)'
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
'--member-db',
|
||||||
|
default='nnjdsamembers.csv',
|
||||||
|
help='Path to the member database CSV file (default: nnjdsamembers.csv)'
|
||||||
|
)
|
||||||
|
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
print("=" * 60)
|
||||||
|
print("Member Classification Script")
|
||||||
|
print("=" * 60)
|
||||||
|
print(f"All members file: {args.all_members}")
|
||||||
|
print(f"Member database: {args.member_db}")
|
||||||
|
print()
|
||||||
|
|
||||||
|
# Classify members
|
||||||
|
not_in_good_standing, existing_members, new_members, date_string = classify_members(
|
||||||
|
args.all_members,
|
||||||
|
args.member_db
|
||||||
|
)
|
||||||
|
|
||||||
|
if not date_string:
|
||||||
|
print("Error: Could not determine output directory name")
|
||||||
|
return
|
||||||
|
|
||||||
|
# Create output directory
|
||||||
|
output_dir = Path(date_string)
|
||||||
|
output_dir.mkdir(exist_ok=True)
|
||||||
|
print(f"\nOutput directory: {output_dir}/")
|
||||||
|
|
||||||
|
# Get fieldnames from the first row (all should have same structure)
|
||||||
|
fieldnames = None
|
||||||
|
if not_in_good_standing:
|
||||||
|
fieldnames = list(not_in_good_standing[0].keys())
|
||||||
|
elif existing_members:
|
||||||
|
fieldnames = list(existing_members[0].keys())
|
||||||
|
elif new_members:
|
||||||
|
fieldnames = list(new_members[0].keys())
|
||||||
|
else:
|
||||||
|
print("Warning: No data to write")
|
||||||
|
return
|
||||||
|
|
||||||
|
# Write output files
|
||||||
|
print("\nWriting output files...")
|
||||||
|
|
||||||
|
write_csv_file(
|
||||||
|
not_in_good_standing,
|
||||||
|
output_dir / f"{date_string}_not-in-good-standing.csv",
|
||||||
|
fieldnames
|
||||||
|
)
|
||||||
|
|
||||||
|
write_csv_file(
|
||||||
|
existing_members,
|
||||||
|
output_dir / f"{date_string}_existing-member.csv",
|
||||||
|
fieldnames
|
||||||
|
)
|
||||||
|
|
||||||
|
write_csv_file(
|
||||||
|
new_members,
|
||||||
|
output_dir / f"{date_string}_new-member.csv",
|
||||||
|
fieldnames
|
||||||
|
)
|
||||||
|
|
||||||
|
print("\n" + "=" * 60)
|
||||||
|
print("Classification complete!")
|
||||||
|
print("=" * 60)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main()
|
||||||
Reference in New Issue
Block a user