diff options
author | Jörg Frings-Fürst <debian@jff-webhosting.net> | 2017-08-06 19:52:15 +0200 |
---|---|---|
committer | Jörg Frings-Fürst <debian@jff-webhosting.net> | 2017-08-06 19:52:15 +0200 |
commit | b58bc019333ec144207dec72f8c55dc5ebfb7d95 (patch) | |
tree | 173380ecba00a98fa599f6f84f23b0f18449c2d5 /mwc.py | |
parent | 18449d7dd31123e14f8a0d87047f2f85187a156b (diff) | |
parent | 76ea31d1747d8d95ec7ac75be750176beb452f66 (diff) |
Updated version 1.7.6 from 'upstream/1.7.6'
with Debian dir e90897c9a496daa7e8acb59866cfd7c2f63409a9
Diffstat (limited to 'mwc.py')
-rwxr-xr-x | mwc.py | 23 |
1 files changed, 19 insertions, 4 deletions
@@ -69,6 +69,7 @@ def parseSite(site): contenttype = site.get('type', 'html') contentregex = site.get('contentregex', '') titleregex = site.get('titleregex', '') + splitregex = site.get('splitregex', '') enc = site.get('encoding', defaultEncoding) contentxpath = site.get('contentxpath', '') @@ -96,7 +97,10 @@ def parseSite(site): if contenttype == 'text' or (contentxpath == '' and titlexpath == ''): - contents = [file.read().decode(enc)] + thefullcontent = file.read().decode(enc) + contents = [thefullcontent] + if splitregex != '': + contents = thefullcontent.split(splitregex) titles = [] else: baseuri = uri @@ -248,13 +252,13 @@ def getFileContents(shortname): # updates list of content that is stored locally for a specific site -def storeFileContents(shortname, parseResult): +def storeFileContents(shortname, contents): for f in os.listdir('.'): if f.startswith(shortname + '.') and f.endswith('.txt'): os.remove(f) i = 0 - for c in parseResult['contents']: + for c in contents: file = open(shortname + '.' + str(i) + '.txt', 'wb') file.write(c.encode('utf-8')) file.close() @@ -271,7 +275,11 @@ def pollWebsites(): feedXML = etree.parse(io.StringIO(emptyfeed)) # start polling sites + sessionContents = [] + mailsSent = 0 for site in config.sites: + if config.maxMailsPerSession != -1 and mailsSent >= config.maxMailsPerSession: + break print('polling site [' + site['shortname'] + '] ...') parseResult = parseSite(site) @@ -283,6 +291,7 @@ def pollWebsites(): print('WARNING: ' + parseResult['warning']) if config.enableMailNotifications: sendmail(receiver, subject, parseResult['warning'], False, None) + mailsSent = mailsSent + 1 if config.enableRSSFeed: feedXML.xpath('//channel')[0].append(genFeedItem(subject, parseResult['warning'], site['uri'], 0)) else: @@ -291,13 +300,18 @@ def pollWebsites(): fileContents = getFileContents(site['shortname']) i = 0 for content in parseResult['contents']: + if config.maxMailsPerSession != -1 and mailsSent >= config.maxMailsPerSession: + break + if content not in fileContents: changes += 1 + sessionContents.append(content) subject = '[' + site['shortname'] + '] ' + parseResult['titles'][i] print(' ' + subject) if config.enableMailNotifications and len(fileContents) > 0: sendmail(receiver, subject, content, (site.get('type', 'html') == 'html'), site['uri']) + mailsSent = mailsSent + 1 if config.enableRSSFeed: feedXML.xpath('//channel')[0].append(genFeedItem(subject, content, site['uri'], changes)) @@ -305,7 +319,7 @@ def pollWebsites(): if changes > 0: - storeFileContents(site['shortname'], parseResult) + storeFileContents(site['shortname'], sessionContents) print(' ' + str(changes) + ' updates') # store feed @@ -343,6 +357,7 @@ if __name__ == "__main__": if site['shortname'] == dryrun: parseResult = parseSite(site) print(parseResult) + print(str(len(parseResult['contents'])) + " results") break else: try: |