From 90f1e629ea6a339f3b113df81bf66ace48aec7c3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?N=C3=A9munaire?= <nemunaire@pomail.fr>
Date: Fri, 19 Oct 2012 18:41:17 +0200
Subject: [PATCH] Start a tool to manage web requests

---
 tools/web.py | 77 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 77 insertions(+)
 create mode 100644 tools/web.py

diff --git a/tools/web.py b/tools/web.py
new file mode 100644
index 0000000..5f3f4d3
--- /dev/null
+++ b/tools/web.py
@@ -0,0 +1,77 @@
+# coding=utf-8
+
+# Nemubot is a modulable IRC bot, built around XML configuration files.
+# Copyright (C) 2012  Mercier Pierre-Olivier
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+import http.client
+import re
+from urllib.parse import quote
+
+def parseURL(url):
+    """Separate protocol, domain, port and page request"""
+    res = re.match("^(([^:]+)://)?([^:/]+)(:([0-9]{1,5}))?(.*)$", url)
+    if res is not None:
+        port = res.group(5)
+        if port is None and res.group(2) is not None:
+            if res.group(2) == "http":
+                port = 80
+            elif res.group(2) == "https":
+                port = 443
+        return (res.group(2), res.group(3), port, res.group(6))
+    else:
+        return (None, None, None, None)
+
+def getDomain(url):
+    """Return the domain of a given URL"""
+    (protocol, domain, port, page) = parseURL(url)
+    return domain
+
+def getProtocol(url):
+    """Return the domain of a given URL"""
+    (protocol, domain, port, page) = parseURL(url)
+    return protocol
+
+def getURL(url):
+    """Return page content corresponding to URL or None if any error occurs"""
+    conn = http.client.HTTPConnection("api.duckduckgo.com", timeout=5)
+    try:
+        conn.request("GET", "/?q=%s&format=xml" % quote(terms))
+    except socket.gaierror:
+        print ("impossible de récupérer la page %s."%(p))
+        return (http.client.INTERNAL_SERVER_ERROR, None)
+
+    res = conn.getresponse()
+    data = res.read()
+
+    conn.close()
+    return (res.status, data)
+
+if __name__ == "__main__":
+  content1 = ""
+  with open("rss.php.1", "r") as f:
+    for line in f:
+      content1 += line
+  content2 = ""
+  with open("rss.php", "r") as f:
+    for line in f:
+      content2 += line
+  a = Atom (content1)
+  print (a.updated)
+  b = Atom (content2)
+  print (b.updated)
+
+  diff = a.diff (b)
+  print (diff)