# https://architizer.com/sitemap-firms.xml
# Step 1: Download the above link to the file 'sitemap-firms.xml' and put it in the same directory as the python script. 
# Step 2: Run the python script. It's sorta hacked together so it only half works, but it should do the basics. You can filter
# the list more if you want. 

import xml.etree.ElementTree as ET
import requests
import re

email_regex = 'mailto:\[email protected]\S+\.\S+'

tree = ET.parse('sitemap-firms.xml')
root = tree.getroot()
urls = []

for url in root:
    for item in url:
        if item.tag[-3:] == 'loc':

rsp = ''
for url in urls:
    rsp = requests.get(url)
    m = re.findall(email_regex, str(rsp.content))
    # probably the url
    s = m[0]
    # cut the mailto part

