wasup kelso

Home   »   wasup kelso

# https://architizer.com/sitemap-firms.xml
# Step 1: Download the above link to the file 'sitemap-firms.xml' and put it in the same directory as the python script. 
# Step 2: Run the python script. It's sorta hacked together so it only half works, but it should do the basics. You can filter
# the list more if you want. 

import xml.etree.ElementTree as ET
import requests
import re

email_regex = 'mailto:\[email protected]\S+\.\S+'


tree = ET.parse('sitemap-firms.xml')
root = tree.getroot()
urls = []

for url in root:
    for item in url:
        if item.tag[-3:] == 'loc':
            urls.append(item.text)


rsp = ''
for url in urls:
    rsp = requests.get(url)
    m = re.findall(email_regex, str(rsp.content))
    # probably the url
    s = m[0]
    # cut the mailto part
    print(s[s.find(":")+1:s.find('"')])

Leave a Reply

Your email address will not be published.