From 90f1e629ea6a339f3b113df81bf66ace48aec7c3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?N=C3=A9munaire?= Date: Fri, 19 Oct 2012 18:41:17 +0200 Subject: [PATCH] Start a tool to manage web requests --- tools/web.py | 77 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 77 insertions(+) create mode 100644 tools/web.py diff --git a/tools/web.py b/tools/web.py new file mode 100644 index 0000000..5f3f4d3 --- /dev/null +++ b/tools/web.py @@ -0,0 +1,77 @@ +# coding=utf-8 + +# Nemubot is a modulable IRC bot, built around XML configuration files. +# Copyright (C) 2012 Mercier Pierre-Olivier +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +import http.client +import re +from urllib.parse import quote + +def parseURL(url): + """Separate protocol, domain, port and page request""" + res = re.match("^(([^:]+)://)?([^:/]+)(:([0-9]{1,5}))?(.*)$", url) + if res is not None: + port = res.group(5) + if port is None and res.group(2) is not None: + if res.group(2) == "http": + port = 80 + elif res.group(2) == "https": + port = 443 + return (res.group(2), res.group(3), port, res.group(6)) + else: + return (None, None, None, None) + +def getDomain(url): + """Return the domain of a given URL""" + (protocol, domain, port, page) = parseURL(url) + return domain + +def getProtocol(url): + """Return the domain of a given URL""" + (protocol, domain, port, page) = parseURL(url) + return protocol + +def getURL(url): + """Return page content corresponding to URL or None if any error occurs""" + conn = http.client.HTTPConnection("api.duckduckgo.com", timeout=5) + try: + conn.request("GET", "/?q=%s&format=xml" % quote(terms)) + except socket.gaierror: + print ("impossible de récupérer la page %s."%(p)) + return (http.client.INTERNAL_SERVER_ERROR, None) + + res = conn.getresponse() + data = res.read() + + conn.close() + return (res.status, data) + +if __name__ == "__main__": + content1 = "" + with open("rss.php.1", "r") as f: + for line in f: + content1 += line + content2 = "" + with open("rss.php", "r") as f: + for line in f: + content2 += line + a = Atom (content1) + print (a.updated) + b = Atom (content2) + print (b.updated) + + diff = a.diff (b) + print (diff)