summaryrefslogtreecommitdiff
path: root/tools
diff options
context:
space:
mode:
authorRuben Rodriguez <ruben@gnu.org>2014-10-21 01:10:26 +0200
committerRuben Rodriguez <ruben@gnu.org>2014-10-21 01:10:26 +0200
commitc47f3dda10695dc1e71851e8cf7fdfb99d9d1c66 (patch)
tree2a44d64548885f28939d4f2fc17f9759849c3b57 /tools
parent14e6d0ed4be07a4d6bf94a141af83b7d60f1ac5f (diff)
Added tools dir
Diffstat (limited to 'tools')
-rw-r--r--tools/AddonsScraper.py169
-rw-r--r--tools/buildbinaries46
-rw-r--r--tools/gnupload54
3 files changed, 269 insertions, 0 deletions
diff --git a/tools/AddonsScraper.py b/tools/AddonsScraper.py
new file mode 100644
index 0000000..3813252
--- /dev/null
+++ b/tools/AddonsScraper.py
@@ -0,0 +1,169 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+# Copyright (C) 2014 Rubén Rodríguez <ruben@gnu.org>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+#
+#
+# This package parses https://addons.mozilla.org and generates a database
+# listing the addons that are under a valid license. addons.mozilla.org is
+# itself licensed as "CC Attribution Share-Alike v3.0 or any later version."
+# Note that screenshots get licensed under the same license as the program.
+#
+# To run, install mysql-server, python-mysqldb and python-beautifulsoup
+#
+
+
+import re
+import sys
+import urllib2
+import BeautifulSoup
+
+server="https://addons.mozilla.org"
+parsepages=2 #Number of pages per category to parse
+dbuser="root"
+dbpass=""
+
+validlicenses = ['http://www.gnu.org/licenses/gpl-3.0.html',
+'http://www.gnu.org/licenses/gpl-2.0.html',
+'http://www.gnu.org/licenses/lgpl-3.0.html',
+'http://www.gnu.org/licenses/lgpl-2.1.html',
+'http://www.opensource.org/licenses/bsd-license.php',
+'http://www.opensource.org/licenses/mit-license.php',
+'http://www.mozilla.org/MPL/MPL-1.1.html',
+'http://www.mozilla.org/MPL/2.0/']
+
+categories = ['alerts-updates', 'appearance', 'bookmarks', 'download-management', 'feeds-news-blogging', 'games-entertainment', 'language-support', 'photos-music-videos', 'privacy-security', 'search-tools', 'shopping', 'social-communication', 'tabs', 'web-development', 'other']
+
+def normalink(string):
+ return re.sub('\?.*', '', string)
+
+def parselist(url):
+ print "PARSING LIST: " + url
+ l = []
+ request = urllib2.Request(url)
+ response = urllib2.urlopen(request)
+ soup = BeautifulSoup.BeautifulSoup(response)
+ for infodiv in soup.findAll('div',{'class':'info'}):
+ for h3 in infodiv.findAll('h3'):
+ for link in h3.findAll('a'):
+ l.append(re.sub('\?.*', '', link['href']))
+ return l
+
+
+def parsepage(url, category):
+ request = urllib2.Request(url)
+ response = urllib2.urlopen(request)
+ soup = BeautifulSoup.BeautifulSoup(response)
+ try:
+ licenseli = soup.findAll('li',{'class':'source-license'})[0]
+ license = licenseli.findAll('a')[0]['href']
+ if license not in validlicenses:
+ if license[0] == "h":
+ print "INVALID LICENSE: " + license
+ return 0
+ except:
+ return 0
+ name = re.sub('/$','', normalink(url))
+ name = re.sub('.*/','', name)
+ prettyname = soup.findAll(attrs={"property":"og:title"})[0]['content']
+ description = soup.findAll(attrs={"property":"og:description"})[0]['content']
+ rating = soup.findAll(attrs={"itemprop":"ratingValue"})[0]['content']
+ popularity = soup.findAll(attrs={"itemprop":"interactionCount"})[0]['content']
+ popularity = re.sub('UserDownloads:', '', popularity)
+ htmldescription = soup.findAll('div',{'id':'addon-description'})[0]
+ icon = normalink(soup.findAll(attrs={"property":"og:image"})[0]['content'])
+ screenshots = []
+ try:
+ previewdiv = soup.findAll('ul',{'id':'preview'})[0]
+ for a in previewdiv.findAll('a'):
+ screenshots.append(normalink(a['href']))
+ except:
+ pass
+ version = soup.findAll('span',{'class':'version-number'})[0].text
+ addondiv = soup.findAll('div',{'id':'addon'})[0]
+ addonp = addondiv.findAll('p',{'class':'install-button'})[0]
+ button = addonp.findAll('a')[0]
+ downloadlink = server + normalink(button['href'])
+ try:
+ homelink = soup.findAll('a',{'class':'home'})[1]['href']
+ homelink = re.sub('.*//','http://',homelink)
+ except:
+ homelink = ""
+ try:
+ supportlink = soup.findAll('a',{'class':'support'})[0]['href']
+ supportlink = re.sub('.*//','http://',supportlink)
+ except:
+ supportlink = ""
+
+ htmldescription = unicode(htmldescription)
+ description = unicode(description)
+ prettyname = unicode(prettyname)
+ description = re.sub('\'', '\\\'', description)
+ htmldescription = re.sub('\'', '\\\'', htmldescription)
+ prettyname = re.sub('\'', '\\\'', prettyname)
+ screenshots = unicode(screenshots)
+
+ sql = u'INSERT INTO addons.addons (`name`, `prettyname`, `description`, `htmldescription`, `icon`, `screenshots`, `version`, `rating`, `popularity`, `downloadlink`, `homelink`, `supportlink`, `retrievedlink`, `license`, `category`) VALUES ("' +name+ '", \'' +prettyname+ '\', \'' +description+ '\', \'' +htmldescription+ '\', "' +icon+ '", "' +screenshots+ '", "' +version+ '", "' +rating+ '", "' +popularity+ '", "' +downloadlink+ '", "' +homelink+ '", "' +supportlink+ '", "' +url+ '" , "' +license+ '", "' +category+ '");'
+ try:
+ cursor = db.cursor()
+ cursor.execute(sql)
+ cursor.close()
+ db.commit()
+ except:
+ print 'Failed to insert "' + name + '", query: "' + sql
+ else:
+ print "Added " + name + " " + url
+
+import MySQLdb
+db = MySQLdb.connect(host="localhost",user=dbuser,passwd=dbpass, charset="utf8", use_unicode=True)
+cursor = db.cursor()
+sql = """DROP DATABASE IF EXISTS addons;
+CREATE DATABASE addons CHARACTER SET utf8 COLLATE utf8_general_ci;
+USE addons;
+CREATE TABLE addons(
+id INT PRIMARY KEY AUTO_INCREMENT,
+name VARCHAR(50),
+prettyname VARCHAR(50),
+description TEXT,
+htmldescription TEXT,
+icon VARCHAR(255),
+screenshots TEXT,
+version VARCHAR(20),
+rating VARCHAR(10),
+popularity INT,
+downloadlink VARCHAR(255),
+homelink VARCHAR(255),
+supportlink VARCHAR(255),
+retrievedlink VARCHAR(255),
+license VARCHAR(255),
+category VARCHAR(20)
+);
+"""
+cursor.execute(sql)
+cursor.close()
+db.commit()
+
+for category in categories:
+ links=[]
+ for page in range(1,1+parsepages):
+ links = links + parselist(server + "/en-US/firefox/extensions/" + category + "/?sort=popular&page="+str(page))
+ for link in links:
+ parsepage(server+link, category)
+
+#tests
+#parsepage("https://addons.mozilla.org/en-US/firefox/addon/twoo/", "test")
+#parsepage("https://addons.mozilla.org/en-US/firefox/addon/what-about/", "test")
diff --git a/tools/buildbinaries b/tools/buildbinaries
new file mode 100644
index 0000000..0301cd7
--- /dev/null
+++ b/tools/buildbinaries
@@ -0,0 +1,46 @@
+#!/bin/bash
+
+set -e
+
+VERSION=31.2.0
+JAILDIR="/home/systems/gnuzilla/jails/toutatis"
+ARCHS="i386 amd64"
+
+rm binaries -rf
+mkdir binaries
+
+for ARCH in $ARCHS; do
+ [ -f $JAILDIR-$ARCH/proc/cpuinfo ] || mount -t proc none $JAILDIR-$ARCH/proc
+ mount -t tmpfs -o size=20G none $JAILDIR-$ARCH/root/
+ cp icecat-$VERSION -a $JAILDIR-$ARCH/root/
+
+ cat << EOF > $JAILDIR-$ARCH/root/buildscript
+set -e
+set -x
+
+export LANG=C
+cd /root/icecat-$VERSION
+mkdir temp
+cd temp
+
+../configure --with-l10n-base=\$PWD/../l10n --enable-official-branding --disable-crashreporter --disable-gnomevfs --enable-gio --disable-debug --enable-gstreamer=0.10 --with-distribution-id=org.gnu --disable-updater
+make -j8
+
+cd browser/installer
+make
+
+cd ../locales
+for locale in \$(ls ../../../l10n/ -1); do
+make langpack-\$locale LOCALE_MERGEDIR=.
+done
+EOF
+
+ chroot $JAILDIR-$ARCH /bin/bash /root/buildscript
+ cp $JAILDIR-$ARCH/root/icecat-$VERSION/temp/dist/icecat*.tar.bz2 binaries
+ [ $ARCH = i386 ] && cp $JAILDIR-$ARCH/root/icecat-$VERSION/temp/dist/linux-*/xpi/ -a binaries/langpacks
+
+ umount $JAILDIR-$ARCH/root/ || true
+ umount $JAILDIR-$ARCH/proc || true
+
+done
+
diff --git a/tools/gnupload b/tools/gnupload
new file mode 100644
index 0000000..ca20b6e
--- /dev/null
+++ b/tools/gnupload
@@ -0,0 +1,54 @@
+#!/bin/bash
+
+[ -d binaries ] || echo Binaries dir not found, exiting
+[ -d binaries ] || exit 1
+
+find binaries -type f | grep -e asc$ -e sig$ | xargs -r rm
+
+set -e
+
+VERSION=31.2.0
+newline="
+"
+mputs=""
+
+for file in $(ls binaries/|grep icecat); do
+
+ gpg -b --default-key D7E04784 binaries/$file
+ echo "version: 1.2
+filename: $file
+directory: gnuzilla/$VERSION
+" > binaries/$file.directive
+gpg --default-key D7E04784 --clearsign binaries/$file.directive
+rm binaries/$file.directive
+mputs="${mputs}put binaries/$file $file $newline"
+mputs="${mputs}put binaries/$file.sig $file.sig $newline"
+mputs="${mputs}put binaries/$file.directive.asc $file.directive.asc $newline"
+
+done
+
+
+for file in $(ls binaries/langpacks); do
+
+ gpg -b --default-key D7E04784 binaries/langpacks/$file
+ echo "version: 1.2
+filename: $file
+directory: gnuzilla/$VERSION/langpacks
+" > binaries/langpacks/$file.directive
+gpg --default-key D7E04784 --clearsign binaries/langpacks/$file.directive
+rm binaries/langpacks/$file.directive
+mputs="${mputs}put binaries/langpacks/$file $file $newline"
+mputs="${mputs}put binaries/langpacks/$file.sig $file.sig $newline"
+mputs="${mputs}put binaries/langpacks/$file.directive.asc $file.directive.asc $newline"
+
+done
+
+#cat << EOF
+ftp -v -p -n -i << EOF | tee log
+open ftp-upload.gnu.org
+user anonymous anonymous
+cd /incoming/alpha
+$mputs
+quit
+EOF
+