#!/usr/bin/python """ External Links (Namespace Aware) for Wikipedia """ import urllib from xml.dom.minidom import parseString from cgi import FieldStorage import traceback def main(): try: el_pattern = FieldStorage().getfirst('el', '') offset = int( FieldStorage().getfirst('offset', 0) ) namespace = ''.join(filter( lambda x: ( x in '0123456789|' ) , FieldStorage().getfirst('ns', '0').replace(',', '|') )) makepage( el_pattern , offset , namespace ) except: print 'Content-type: text/plain' print '' print traceback.format_exc() def makepage( el_pattern, offset, namespace ): nextpage = None html = u'''

Search

Find the following URL in the article namespace:


Advanced
Namespace ID list:
''' if el_pattern: (html, nextpage) = html_table_output( el_pattern, offset, namespace ) print u'''Content-type: text/html Wikipedia API: External Links ( Namespace Aware ) External Links (Namespace Aware) for Wikipedia [BETA]
Check usage of extenrnal links in Wikipedia using the API.
%s


%s

''' %( html, nextpage and 'Next page' %( h( el_pattern ), nextpage, ) or 'End of results' , ) def h( x ): return x.replace('&','&').replace('<','<').replace('>','>').encode('ascii', 'xmlcharrefreplace') def urlencode(x): def encchar(y): if y == ' ': return '_' if y in 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_': return y return '%%%02x' % ord(y) return ''.join( map( encchar, x ) ) def html_table_output( el_pattern, offset, namespace ): xml = get_xml_data( el_pattern, offset, namespace ) html_items = [] offset = None for item in xml.getElementsByTagName( 'eu' ): pagename = item.getAttribute('title') elurl = item.getAttribute('url') html_items.append( ''' %s %s ''' %( urlencode( pagename ), h( elurl ), urlencode( pagename ), h( pagename ), h( pagename ), h( elurl ) , h( snipurl( elurl ) ) , ) ) for item in xml.getElementsByTagName( 'exturlusage' ): if item.hasAttribute( 'euoffset' ): offset = int( item.getAttribute( 'euoffset' ) ) return ( ''.join( html_items ), offset ) def snipurl(fullurl): ( scheme, url ) = fullurl.split('://', 1) ( base, path ) = ( url.split('/', 1) + [''] )[:2] if len(path) > 18: path = '...%s' % path[-15:] if scheme == 'http': return '%s/%s' %(base, path) return '%s://%s/%s' %(scheme, base, path) def get_xml_data( el_pattern, offset, namespace ): raw_data = urllib.urlopen( 'http://en.wikipedia.org/w/api.php?action=query&list=exturlusage&euquery=%s&eunamespace=%s&eulimit=50&euoffset=%d&format=xml' %( urlencode( el_pattern ) , namespace , offset , ) ).read() return parseString( raw_data ) if __name__=='__main__': main() else: print __doc__