diff options
author | Jörg Frings-Fürst <debian@jff-webhosting.net> | 2017-08-06 20:18:23 +0200 |
---|---|---|
committer | Jörg Frings-Fürst <debian@jff-webhosting.net> | 2017-08-06 20:18:23 +0200 |
commit | 2cf54f179c9411b11dabe00f5c831245fbb7a622 (patch) | |
tree | cbdbcbbbee31a3e71153b2eddf432f99fdad2a29 | |
parent | 18449d7dd31123e14f8a0d87047f2f85187a156b (diff) | |
parent | 6c03e9d2fa808b9c5a223c4d01f4d0b848fe97f1 (diff) |
Merge branch 'feature/upstream' into develop
-rw-r--r-- | .bzrignore | 3 | ||||
-rw-r--r-- | .gitignore | 3 | ||||
-rw-r--r-- | README.md | 3 | ||||
-rw-r--r-- | config_template.py | 3 | ||||
-rw-r--r-- | debian/README.source | 18 | ||||
-rw-r--r-- | debian/changelog | 7 | ||||
-rw-r--r-- | debian/control | 2 | ||||
-rwxr-xr-x | mwc.py | 23 |
8 files changed, 48 insertions, 14 deletions
diff --git a/.bzrignore b/.bzrignore deleted file mode 100644 index 2386f62..0000000 --- a/.bzrignore +++ /dev/null @@ -1,3 +0,0 @@ -.git -**/.git -**/.pc @@ -4,6 +4,5 @@ /*.txt *~ *.pyc -.bzrignore -.bzr .pc + @@ -54,6 +54,8 @@ sites = [ Regular expression. If XPath/CSS selector is defined, the regular expression is applied afterwards. * <b>encoding</b> (optional; default: 'utf-8') Character encoding of the website, e.g., 'utf-8' or 'iso-8859-1'. + * <b>splitregex</b> (optional) + only works if type is set to 'text'; defines that content should be split to chunks based on the defined regex expression. * <b>receiver</b> (optional) Overrides global receiver specification. * <b>user-agent</b> (optional) @@ -79,6 +81,7 @@ sites = [ <pre> <code> enableMailNotifications = True #enable/disable notification messages; if set to False, only send error messages +maxMailsPerSession = -1 #max. number of mails to send per session; ignored when set to -1 subjectPostfix = 'A website has been updated!' sender = 'me@mymail.com' diff --git a/config_template.py b/config_template.py index 02f7579..f394e52 100644 --- a/config_template.py +++ b/config_template.py @@ -15,7 +15,7 @@ sites = [ 'titleregex': '', 'contentregex': '', 'user-agent': 'Mozilla/5.0 (X11; Fedora; Linux x86_64; rv:49.0) Gecko/20100101 Firefox/49.0', - 'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', + 'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8' 'encoding': 'utf-8'}, {'shortname': 'mywebsite2', @@ -42,6 +42,7 @@ sites = [ subjectPostfix = 'A website has been updated!' enableMailNotifications = True +maxMailsPerSession = -1 sender = 'me@mymail.com' smtphost = 'mysmtpprovider.com' useTLS = True diff --git a/debian/README.source b/debian/README.source new file mode 100644 index 0000000..e4f2b3d --- /dev/null +++ b/debian/README.source @@ -0,0 +1,18 @@ +Hello, + +now I use the branching model from Vincent Driessen[1]. + +I use the gitflow-avh[2]. with the Documentation[3]. +The Debian package can be found here[4]. + +Please upload unattended uploads use a branch feature/<your title>. + + +Many thanks. + + -- Jörg Frings-Fürst <debian@jff-webhosting.net> Fri, 02 Jun 2017 19:00:40 +0200 + +[1] http://nvie.com/posts/a-successful-git-branching-model/ +[2] https://github.com/petervanderdoes/gitflow-avh +[3] https://github.com/petervanderdoes/gitflow-avh/wiki +[4] https://tracker.debian.org/pkg/git-flow diff --git a/debian/changelog b/debian/changelog index 48ee0e8..18767a8 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,19 +1,20 @@ -mwc (1.7.5-1) UNRELEASED; urgency=medium +mwc (1.7.6-1) UNRELEASED; urgency=medium * New upstream release. * Renumbering patches. * debian/patches/0105-try_mail.diff: - Replace undefined printf with print (Closes: #860494). * Rewrite debian/watch for archives without "v" in front of the version. - * Bump Standards-Version to 3.9.8. + * Declare compliance with Debian Policy 4.0.0. (No changes needed). * Bump compatlevel to 10 (no changes required): - Change debian/compat to 10. - At debian/control change requested version of debhelper to >= 10. * At debian/control change Vcs-Browser to secure URI. * debian/copyright: - Refresh copyright year at * and debian/*. + * New README.source to explain the branching model used. - -- Jörg Frings-Fürst <debian@jff-webhosting.net> Tue, 18 Apr 2017 11:06:04 +0200 + -- Jörg Frings-Fürst <debian@jff-webhosting.net> Sun, 06 Aug 2017 19:52:54 +0200 mwc (1.7.2-3) unstable; urgency=medium diff --git a/debian/control b/debian/control index 70dd2d3..f38df7d 100644 --- a/debian/control +++ b/debian/control @@ -6,7 +6,7 @@ Build-Depends: debhelper (>= 10), dh-python, python3-all -Standards-Version: 3.9.8 +Standards-Version: 4.0.0 Homepage: https://github.com/Debianguru/MailWebsiteChanges Vcs-Git: git://anonscm.debian.org/collab-maint/mwc.git Vcs-Browser: https://anonscm.debian.org/cgit/collab-maint/mwc.git @@ -69,6 +69,7 @@ def parseSite(site): contenttype = site.get('type', 'html') contentregex = site.get('contentregex', '') titleregex = site.get('titleregex', '') + splitregex = site.get('splitregex', '') enc = site.get('encoding', defaultEncoding) contentxpath = site.get('contentxpath', '') @@ -96,7 +97,10 @@ def parseSite(site): if contenttype == 'text' or (contentxpath == '' and titlexpath == ''): - contents = [file.read().decode(enc)] + thefullcontent = file.read().decode(enc) + contents = [thefullcontent] + if splitregex != '': + contents = thefullcontent.split(splitregex) titles = [] else: baseuri = uri @@ -248,13 +252,13 @@ def getFileContents(shortname): # updates list of content that is stored locally for a specific site -def storeFileContents(shortname, parseResult): +def storeFileContents(shortname, contents): for f in os.listdir('.'): if f.startswith(shortname + '.') and f.endswith('.txt'): os.remove(f) i = 0 - for c in parseResult['contents']: + for c in contents: file = open(shortname + '.' + str(i) + '.txt', 'wb') file.write(c.encode('utf-8')) file.close() @@ -271,7 +275,11 @@ def pollWebsites(): feedXML = etree.parse(io.StringIO(emptyfeed)) # start polling sites + sessionContents = [] + mailsSent = 0 for site in config.sites: + if config.maxMailsPerSession != -1 and mailsSent >= config.maxMailsPerSession: + break print('polling site [' + site['shortname'] + '] ...') parseResult = parseSite(site) @@ -283,6 +291,7 @@ def pollWebsites(): print('WARNING: ' + parseResult['warning']) if config.enableMailNotifications: sendmail(receiver, subject, parseResult['warning'], False, None) + mailsSent = mailsSent + 1 if config.enableRSSFeed: feedXML.xpath('//channel')[0].append(genFeedItem(subject, parseResult['warning'], site['uri'], 0)) else: @@ -291,13 +300,18 @@ def pollWebsites(): fileContents = getFileContents(site['shortname']) i = 0 for content in parseResult['contents']: + if config.maxMailsPerSession != -1 and mailsSent >= config.maxMailsPerSession: + break + if content not in fileContents: changes += 1 + sessionContents.append(content) subject = '[' + site['shortname'] + '] ' + parseResult['titles'][i] print(' ' + subject) if config.enableMailNotifications and len(fileContents) > 0: sendmail(receiver, subject, content, (site.get('type', 'html') == 'html'), site['uri']) + mailsSent = mailsSent + 1 if config.enableRSSFeed: feedXML.xpath('//channel')[0].append(genFeedItem(subject, content, site['uri'], changes)) @@ -305,7 +319,7 @@ def pollWebsites(): if changes > 0: - storeFileContents(site['shortname'], parseResult) + storeFileContents(site['shortname'], sessionContents) print(' ' + str(changes) + ' updates') # store feed @@ -343,6 +357,7 @@ if __name__ == "__main__": if site['shortname'] == dryrun: parseResult = parseSite(site) print(parseResult) + print(str(len(parseResult['contents'])) + " results") break else: try: |