diff options
author | Ruben Rodriguez <ruben@gnu.org> | 2014-10-21 01:10:26 +0200 |
---|---|---|
committer | Ruben Rodriguez <ruben@gnu.org> | 2014-10-21 01:10:26 +0200 |
commit | c47f3dda10695dc1e71851e8cf7fdfb99d9d1c66 (patch) | |
tree | 2a44d64548885f28939d4f2fc17f9759849c3b57 /tools | |
parent | 14e6d0ed4be07a4d6bf94a141af83b7d60f1ac5f (diff) |
Added tools dir
Diffstat (limited to 'tools')
-rw-r--r-- | tools/AddonsScraper.py | 169 | ||||
-rw-r--r-- | tools/buildbinaries | 46 | ||||
-rw-r--r-- | tools/gnupload | 54 |
3 files changed, 269 insertions, 0 deletions
diff --git a/tools/AddonsScraper.py b/tools/AddonsScraper.py new file mode 100644 index 0000000..3813252 --- /dev/null +++ b/tools/AddonsScraper.py @@ -0,0 +1,169 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Copyright (C) 2014 Rubén Rodríguez <ruben@gnu.org> +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA +# +# +# This package parses https://addons.mozilla.org and generates a database +# listing the addons that are under a valid license. addons.mozilla.org is +# itself licensed as "CC Attribution Share-Alike v3.0 or any later version." +# Note that screenshots get licensed under the same license as the program. +# +# To run, install mysql-server, python-mysqldb and python-beautifulsoup +# + + +import re +import sys +import urllib2 +import BeautifulSoup + +server="https://addons.mozilla.org" +parsepages=2 #Number of pages per category to parse +dbuser="root" +dbpass="" + +validlicenses = ['http://www.gnu.org/licenses/gpl-3.0.html', +'http://www.gnu.org/licenses/gpl-2.0.html', +'http://www.gnu.org/licenses/lgpl-3.0.html', +'http://www.gnu.org/licenses/lgpl-2.1.html', +'http://www.opensource.org/licenses/bsd-license.php', +'http://www.opensource.org/licenses/mit-license.php', +'http://www.mozilla.org/MPL/MPL-1.1.html', +'http://www.mozilla.org/MPL/2.0/'] + +categories = ['alerts-updates', 'appearance', 'bookmarks', 'download-management', 'feeds-news-blogging', 'games-entertainment', 'language-support', 'photos-music-videos', 'privacy-security', 'search-tools', 'shopping', 'social-communication', 'tabs', 'web-development', 'other'] + +def normalink(string): + return re.sub('\?.*', '', string) + +def parselist(url): + print "PARSING LIST: " + url + l = [] + request = urllib2.Request(url) + response = urllib2.urlopen(request) + soup = BeautifulSoup.BeautifulSoup(response) + for infodiv in soup.findAll('div',{'class':'info'}): + for h3 in infodiv.findAll('h3'): + for link in h3.findAll('a'): + l.append(re.sub('\?.*', '', link['href'])) + return l + + +def parsepage(url, category): + request = urllib2.Request(url) + response = urllib2.urlopen(request) + soup = BeautifulSoup.BeautifulSoup(response) + try: + licenseli = soup.findAll('li',{'class':'source-license'})[0] + license = licenseli.findAll('a')[0]['href'] + if license not in validlicenses: + if license[0] == "h": + print "INVALID LICENSE: " + license + return 0 + except: + return 0 + name = re.sub('/$','', normalink(url)) + name = re.sub('.*/','', name) + prettyname = soup.findAll(attrs={"property":"og:title"})[0]['content'] + description = soup.findAll(attrs={"property":"og:description"})[0]['content'] + rating = soup.findAll(attrs={"itemprop":"ratingValue"})[0]['content'] + popularity = soup.findAll(attrs={"itemprop":"interactionCount"})[0]['content'] + popularity = re.sub('UserDownloads:', '', popularity) + htmldescription = soup.findAll('div',{'id':'addon-description'})[0] + icon = normalink(soup.findAll(attrs={"property":"og:image"})[0]['content']) + screenshots = [] + try: + previewdiv = soup.findAll('ul',{'id':'preview'})[0] + for a in previewdiv.findAll('a'): + screenshots.append(normalink(a['href'])) + except: + pass + version = soup.findAll('span',{'class':'version-number'})[0].text + addondiv = soup.findAll('div',{'id':'addon'})[0] + addonp = addondiv.findAll('p',{'class':'install-button'})[0] + button = addonp.findAll('a')[0] + downloadlink = server + normalink(button['href']) + try: + homelink = soup.findAll('a',{'class':'home'})[1]['href'] + homelink = re.sub('.*//','http://',homelink) + except: + homelink = "" + try: + supportlink = soup.findAll('a',{'class':'support'})[0]['href'] + supportlink = re.sub('.*//','http://',supportlink) + except: + supportlink = "" + + htmldescription = unicode(htmldescription) + description = unicode(description) + prettyname = unicode(prettyname) + description = re.sub('\'', '\\\'', description) + htmldescription = re.sub('\'', '\\\'', htmldescription) + prettyname = re.sub('\'', '\\\'', prettyname) + screenshots = unicode(screenshots) + + sql = u'INSERT INTO addons.addons (`name`, `prettyname`, `description`, `htmldescription`, `icon`, `screenshots`, `version`, `rating`, `popularity`, `downloadlink`, `homelink`, `supportlink`, `retrievedlink`, `license`, `category`) VALUES ("' +name+ '", \'' +prettyname+ '\', \'' +description+ '\', \'' +htmldescription+ '\', "' +icon+ '", "' +screenshots+ '", "' +version+ '", "' +rating+ '", "' +popularity+ '", "' +downloadlink+ '", "' +homelink+ '", "' +supportlink+ '", "' +url+ '" , "' +license+ '", "' +category+ '");' + try: + cursor = db.cursor() + cursor.execute(sql) + cursor.close() + db.commit() + except: + print 'Failed to insert "' + name + '", query: "' + sql + else: + print "Added " + name + " " + url + +import MySQLdb +db = MySQLdb.connect(host="localhost",user=dbuser,passwd=dbpass, charset="utf8", use_unicode=True) +cursor = db.cursor() +sql = """DROP DATABASE IF EXISTS addons; +CREATE DATABASE addons CHARACTER SET utf8 COLLATE utf8_general_ci; +USE addons; +CREATE TABLE addons( +id INT PRIMARY KEY AUTO_INCREMENT, +name VARCHAR(50), +prettyname VARCHAR(50), +description TEXT, +htmldescription TEXT, +icon VARCHAR(255), +screenshots TEXT, +version VARCHAR(20), +rating VARCHAR(10), +popularity INT, +downloadlink VARCHAR(255), +homelink VARCHAR(255), +supportlink VARCHAR(255), +retrievedlink VARCHAR(255), +license VARCHAR(255), +category VARCHAR(20) +); +""" +cursor.execute(sql) +cursor.close() +db.commit() + +for category in categories: + links=[] + for page in range(1,1+parsepages): + links = links + parselist(server + "/en-US/firefox/extensions/" + category + "/?sort=popular&page="+str(page)) + for link in links: + parsepage(server+link, category) + +#tests +#parsepage("https://addons.mozilla.org/en-US/firefox/addon/twoo/", "test") +#parsepage("https://addons.mozilla.org/en-US/firefox/addon/what-about/", "test") diff --git a/tools/buildbinaries b/tools/buildbinaries new file mode 100644 index 0000000..0301cd7 --- /dev/null +++ b/tools/buildbinaries @@ -0,0 +1,46 @@ +#!/bin/bash + +set -e + +VERSION=31.2.0 +JAILDIR="/home/systems/gnuzilla/jails/toutatis" +ARCHS="i386 amd64" + +rm binaries -rf +mkdir binaries + +for ARCH in $ARCHS; do + [ -f $JAILDIR-$ARCH/proc/cpuinfo ] || mount -t proc none $JAILDIR-$ARCH/proc + mount -t tmpfs -o size=20G none $JAILDIR-$ARCH/root/ + cp icecat-$VERSION -a $JAILDIR-$ARCH/root/ + + cat << EOF > $JAILDIR-$ARCH/root/buildscript +set -e +set -x + +export LANG=C +cd /root/icecat-$VERSION +mkdir temp +cd temp + +../configure --with-l10n-base=\$PWD/../l10n --enable-official-branding --disable-crashreporter --disable-gnomevfs --enable-gio --disable-debug --enable-gstreamer=0.10 --with-distribution-id=org.gnu --disable-updater +make -j8 + +cd browser/installer +make + +cd ../locales +for locale in \$(ls ../../../l10n/ -1); do +make langpack-\$locale LOCALE_MERGEDIR=. +done +EOF + + chroot $JAILDIR-$ARCH /bin/bash /root/buildscript + cp $JAILDIR-$ARCH/root/icecat-$VERSION/temp/dist/icecat*.tar.bz2 binaries + [ $ARCH = i386 ] && cp $JAILDIR-$ARCH/root/icecat-$VERSION/temp/dist/linux-*/xpi/ -a binaries/langpacks + + umount $JAILDIR-$ARCH/root/ || true + umount $JAILDIR-$ARCH/proc || true + +done + diff --git a/tools/gnupload b/tools/gnupload new file mode 100644 index 0000000..ca20b6e --- /dev/null +++ b/tools/gnupload @@ -0,0 +1,54 @@ +#!/bin/bash + +[ -d binaries ] || echo Binaries dir not found, exiting +[ -d binaries ] || exit 1 + +find binaries -type f | grep -e asc$ -e sig$ | xargs -r rm + +set -e + +VERSION=31.2.0 +newline=" +" +mputs="" + +for file in $(ls binaries/|grep icecat); do + + gpg -b --default-key D7E04784 binaries/$file + echo "version: 1.2 +filename: $file +directory: gnuzilla/$VERSION +" > binaries/$file.directive +gpg --default-key D7E04784 --clearsign binaries/$file.directive +rm binaries/$file.directive +mputs="${mputs}put binaries/$file $file $newline" +mputs="${mputs}put binaries/$file.sig $file.sig $newline" +mputs="${mputs}put binaries/$file.directive.asc $file.directive.asc $newline" + +done + + +for file in $(ls binaries/langpacks); do + + gpg -b --default-key D7E04784 binaries/langpacks/$file + echo "version: 1.2 +filename: $file +directory: gnuzilla/$VERSION/langpacks +" > binaries/langpacks/$file.directive +gpg --default-key D7E04784 --clearsign binaries/langpacks/$file.directive +rm binaries/langpacks/$file.directive +mputs="${mputs}put binaries/langpacks/$file $file $newline" +mputs="${mputs}put binaries/langpacks/$file.sig $file.sig $newline" +mputs="${mputs}put binaries/langpacks/$file.directive.asc $file.directive.asc $newline" + +done + +#cat << EOF +ftp -v -p -n -i << EOF | tee log +open ftp-upload.gnu.org +user anonymous anonymous +cd /incoming/alpha +$mputs +quit +EOF + |