From 76ea31d1747d8d95ec7ac75be750176beb452f66 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B6rg=20Frings-F=C3=BCrst?= Date: Sun, 6 Aug 2017 19:52:14 +0200 Subject: New upstream version 1.7.6 --- mwc.py | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) (limited to 'mwc.py') diff --git a/mwc.py b/mwc.py index a0635a1..c420a74 100755 --- a/mwc.py +++ b/mwc.py @@ -69,6 +69,7 @@ def parseSite(site): contenttype = site.get('type', 'html') contentregex = site.get('contentregex', '') titleregex = site.get('titleregex', '') + splitregex = site.get('splitregex', '') enc = site.get('encoding', defaultEncoding) contentxpath = site.get('contentxpath', '') @@ -96,7 +97,10 @@ def parseSite(site): if contenttype == 'text' or (contentxpath == '' and titlexpath == ''): - contents = [file.read().decode(enc)] + thefullcontent = file.read().decode(enc) + contents = [thefullcontent] + if splitregex != '': + contents = thefullcontent.split(splitregex) titles = [] else: baseuri = uri @@ -248,13 +252,13 @@ def getFileContents(shortname): # updates list of content that is stored locally for a specific site -def storeFileContents(shortname, parseResult): +def storeFileContents(shortname, contents): for f in os.listdir('.'): if f.startswith(shortname + '.') and f.endswith('.txt'): os.remove(f) i = 0 - for c in parseResult['contents']: + for c in contents: file = open(shortname + '.' + str(i) + '.txt', 'wb') file.write(c.encode('utf-8')) file.close() @@ -271,7 +275,11 @@ def pollWebsites(): feedXML = etree.parse(io.StringIO(emptyfeed)) # start polling sites + sessionContents = [] + mailsSent = 0 for site in config.sites: + if config.maxMailsPerSession != -1 and mailsSent >= config.maxMailsPerSession: + break print('polling site [' + site['shortname'] + '] ...') parseResult = parseSite(site) @@ -283,6 +291,7 @@ def pollWebsites(): print('WARNING: ' + parseResult['warning']) if config.enableMailNotifications: sendmail(receiver, subject, parseResult['warning'], False, None) + mailsSent = mailsSent + 1 if config.enableRSSFeed: feedXML.xpath('//channel')[0].append(genFeedItem(subject, parseResult['warning'], site['uri'], 0)) else: @@ -291,13 +300,18 @@ def pollWebsites(): fileContents = getFileContents(site['shortname']) i = 0 for content in parseResult['contents']: + if config.maxMailsPerSession != -1 and mailsSent >= config.maxMailsPerSession: + break + if content not in fileContents: changes += 1 + sessionContents.append(content) subject = '[' + site['shortname'] + '] ' + parseResult['titles'][i] print(' ' + subject) if config.enableMailNotifications and len(fileContents) > 0: sendmail(receiver, subject, content, (site.get('type', 'html') == 'html'), site['uri']) + mailsSent = mailsSent + 1 if config.enableRSSFeed: feedXML.xpath('//channel')[0].append(genFeedItem(subject, content, site['uri'], changes)) @@ -305,7 +319,7 @@ def pollWebsites(): if changes > 0: - storeFileContents(site['shortname'], parseResult) + storeFileContents(site['shortname'], sessionContents) print(' ' + str(changes) + ' updates') # store feed @@ -343,6 +357,7 @@ if __name__ == "__main__": if site['shortname'] == dryrun: parseResult = parseSite(site) print(parseResult) + print(str(len(parseResult['contents'])) + " results") break else: try: -- cgit v1.2.3