From 04b13e003d6af0de21e6c59e411ffee5b97b6134 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B6rg=20Frings-F=C3=BCrst?= Date: Sun, 1 Oct 2017 18:50:17 +0200 Subject: New upstream version 2.0.4 --- README.md | 89 +++++++++++++++++++++++++++++++++++++++++---------------------- 1 file changed, 58 insertions(+), 31 deletions(-) (limited to 'README.md') diff --git a/README.md b/README.md index 8e78da6..69718e7 100644 --- a/README.md +++ b/README.md @@ -19,20 +19,23 @@ Some examples: sites = [ - {'shortname': 'mywebsite1', - 'uri': 'http://www.mywebsite1.com/info', - 'contentcss': 'div'}, - - {'shortname': 'mywebsite2', - 'uri': 'http://www.mywebsite2.com/info', - 'contentxpath': '//*[contains(concat(\' \', normalize-space(@class), \' \'), \' news-list-container \')]', - 'titlexpath': '//title'}, - - {'shortname': 'mywebsite3', - 'uri': 'http://www.mywebsite3.com/info', - 'type': 'text', - 'contentregex': 'Version\"\:\d*\.\d*', - 'user-agent': 'Mozilla/5.0 (X11; Fedora; Linux x86_64; rv:49.0) Gecko/20100101 Firefox/49.0'} + {'name': 'example-css', + 'parsers': [uri(uri='https://github.com/mtill', contenttype='html'), + css(contentcss='div') + ] + }, + + {'name': 'example-xpath', + 'parsers': [uri(uri='https://example-webpage.com/test', contenttype='html'), + xpath(contentxpath='//div[contains(concat(\' \', normalize-space(@class), \' \'), \' package-version-header \')]') + ] + }, + + {'name': 'my-script', + 'parsers': [command(command='/home/user/script.sh', contenttype='text'), + regex(contentregex='^.*$') + ] + } ] @@ -40,31 +43,55 @@ sites = [ * parameters: - * shortname - short name of the entry, used as an identifier when sending email notifications + * name + name of the entry, used as an identifier when sending email notifications + * receiver (optional) + Overrides global receiver specification. + + * parameters for the URL receiver: + * uri - URI of the website; If the scheme of the uri is 'cmd://', the string is interpreted as a command and the standard output (stdout) is parsed. - * type (optional; default: 'html') + URI of the website + * contenttype (optional; default: 'html') content type, e.g., 'xml'/'html'/'text'. - * contentxpath / titlexpath (optional) - XPath expression for the content/title sections to extract. If you prefer, you could use contentcss/titlecss instead. - * contentcss / titlecss (optional) - CSS expression for the content/title sections to extract. This is ignored if there is a corresponding XPath definition. - * contentregex / titleregex (optional) - Regular expression. If XPath/CSS selector is defined, the regular expression is applied afterwards. - * encoding (optional; default: 'utf-8') + * enc (optional; default: 'utf-8') Character encoding of the website, e.g., 'utf-8' or 'iso-8859-1'. - * splitregex (optional) - only works if type is set to 'text'; defines that content should be split to chunks based on the defined regex expression. - * receiver (optional) - Overrides global receiver specification. - * user-agent (optional) + * userAgent (optional) Defines the user agent string, e.g., - 'user-agent': 'Mozilla/5.0 (X11; Fedora; Linux x86_64; rv:49.0) Gecko/20100101 Firefox/49.0' + 'userAgent': 'Mozilla/5.0 (X11; Fedora; Linux x86_64; rv:49.0) Gecko/20100101 Firefox/49.0' * accept (optional) Defines the accept string, e.g., 'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8' + * parameters for the Command receiver + + * command + the command + * contenttype (optional; default: 'text') + content type, e.g., 'xml'/'html'/'text'. + * enc (optional; default: 'utf-8') + Character encoding of the website, e.g., 'utf-8' or 'iso-8859-1'. + + * parameters for the XPath parser: + + * contentxpath + XPath expression for the content sections to extract + * titlexpath (optional) + XPath expression for the title sections to extract + + * parameters for the CSS parser: + + * contentcss + CSS expression for the content sections to extract + * titlecss (optional) + CSS expression for the title sections to extract + + * parameters for the RegEx parser: + + * contentregex + Regular expression for content parsing + * titleregex (optional) + Regular expression for title parsing * We collect some XPath/CSS snippets at this place: Snippet collection - please feel free to add your own definitions! -- cgit v1.2.3