parse-report.py

Home   »   parse-report.py

import fileinput
import re
import sys
from json import dumps

page_regex = re.compile(r'^Page,[0-9]+,of,[0-9]+')

def parse_input():
    # skip the first line
    page_header = sys.stdin.readline()

    lines_to_join = []

    for line in fileinput.input():
        # skip lines that only give a page number
        if page_regex.match(line):
            continue

        # the first line is repeated several times
        if line == page_header:
            continue

        if len(lines_to_join) == 2:
            yield ','.join(lines_to_join)
            lines_to_join = []

        lines_to_join.append(line.rstrip())

for line in parse_input():
    if line == 'Site Name,Technician,Desciption,When Started,Duration (in minutes),Service Date,When Ended':
        # skip headers
        continue

    values = line.split(',')

    if len(values) == 7:
        site_name, technician, description, when_started, duration, service_date, when_ended = values
    else:
        print('bad line', file=sys.stderr)
        continue

    print(dumps({
        'site_name': site_name,
        'technician': technician,
        'description': description,
        'when_started': when_started,
        'duration': int(duration),
        'service_date': service_date,
        'when_ended': when_ended
    }))

Leave a Reply

Your email address will not be published.