Gebruiker:Valhallasw/imagesearch
Uiterlijk
Huidige versie
[bewerken | brontekst bewerken]################################# # INTERWIKI IMAGE HARVESTER # Uses interwiki links on a page to find images used on other wikis to find useable images for subjects # # Uses a wiki format to output, but outputs to stdout. # # (C)2006 by [[nl:Gebruiker:Valhallasw]] and [[nl:Gebruiker:Gerbennn]] # Licenced under the MIT licence ################################# import wikipedia,config,re,sys try: # loading blank lists images = [] commons = [] nocommons = [] page = wikipedia.Page(wikipedia.getSite(), wikipedia.input("Please enter wiki page:")) ######## # 1) Get a list of images ############################################################################## # get interwiki links links = page.interwiki() for link in links: sys.stdout.flush() # flush stdout (useful when piping to a file) try: # add the found image links to the images list images.extend(link.imagelinks()) except wikipedia.IsRedirectPage,target: wikipedia.output(u'DBG: %s raises IsRedirectPage to %s' % (link.aslink(), target.args[0])) try: # if the page is a redirect, try using the redirect target link = wikipedia.Page(link.site(), target.args[0]) images.extend(link.imagelinks()) except (wikipedia.NoPage, wikipedia.PageNotFound, wikipedia.IsRedirectPage): #except non-fatal errors wikipedia.output(u'DBG: %s raised non-fatal %s exception' % (link.aslink(), sys.exc_info()[0])) except (wikipedia.NoPage, wikipedia.PageNotFound): #except non-fatal errors wikipedia.output(u'DBG: %s raised non-fatal %s exception' % (link.aslink(), sys.exc_info()[0])) ######## # 2) Sort the images to commons- and site-images; Remove commons duplicates ############################################################################## for image in images: sys.stdout.flush() # try to retrieve the image from commons: temppage = wikipedia.Page(wikipedia.getSite('commons','commons'),'image:'+image.titleWithoutNamespace()) if temppage.exists(): wikipedia.output(u'Found commons image: '+temppage.aslink()) if commons.count(temppage) == 0: #check if the page is not already in the list commons.append(temppage) else: wikipedia.output(u'Found image: '+image.aslink()) nocommons.append(image) ######## # 3) Output the information ############################################################################## # Output H1 caption with page title wikipedia.output(u'==%s==' % page.title()) # Output table with commons images and descriptions wikipedia.output(u'===Commons===\n{| class="wikitable"') for image in commons: wikipedia.output(u'|-\n|[[%s|200px]] || %s' % (image.title(), re.sub('(\n|</?nowiki>)','',image.get()))) # 200px thumb description in <nowiki> tags #output images that do not appear on commons as an unnumbered list wikipedia.output(u'|}\n===Non-commons===') for image in nocommons: wikipedia.output(u'* [[:%s:%s]]' % (image.site().lang, image.title())) finally: wikipedia.stopme()
Oude versie(s)
[bewerken | brontekst bewerken]Pythonscriptje om een lijst met images te genereren.
dev-uitvoering, op het moment alleen direct in python te gebruiken door te plakken ;)
import wikipedia,config,re page = wikipedia.Page(wikipedia.getSite(),"Kwantumcomputer") contents = page.get() links = re.findall('\[\[..\:.*\]\]',contents) for link in links: site = wikipedia.getSite(re.sub('\[\[(..)\:.*\]\]','\\1',link)) pagename = re.sub('\[\[..\:(.*)\]\]','\\1',link) temppage = wikipedia.Page(site, pagename) temppage.imagelinks()
uiteraard kwantumcomputer door een pagina naar keuze vervangen ;)
Poging tot verbetering door Gerbennn
import wikipedia,config,re try: page = wikipedia.Page(wikipedia.getSite(),"Computer") links = page.interwiki() for link in links: link.imagelinks() finally: wikipedia.stopme()
meer van valhalla:
/* Interwiki image harvester (C) 2006 by [[nl:Gebruiker:Valhallasw]] and [[nl:Gebruiker:Gerbennn]] Licenced under the MIT License */ import wikipedia,config,re,sys try: images = [] page = wikipedia.Page(wikipedia.getSite(),"Computer") links = page.interwiki() for link in links: try: images = images + link.imagelinks() except wikipedia.IsRedirectPage,target: wikipedia.output(u'DBG: %s raises IsRedirectPage to %s' % (link.aslink(), target.args[0])) try: link = wikipedia.Page(link.site(), target.args[0]) except: wikipedia.output(u'DBG: %s raises %s' % (link.aslink(), sys.exc_info()[0])) except: wikipedia.output(u'DBG: %s raises %s' % (link.aslink(), sys.exc_info()[0])) out = u'{| class="wikitable" \n|'+page.aslink()+u'\n|' for image in images: temppage = wikipedia.Page(wikipedia.getSite('commons','commons'),'image:'+image.titleWithoutNamespace()) if temppage.exists(): wikipedia.output(u'Found commons image: '+temppage.aslink()) out = out + '<b>[[:%s]]</b><br />' % temppage.title() else: wikipedia.output(u'Found image: '+image.aslink()) out = out + '[[:%s:%s]]<br />' % (image.site().lang, image.title()) out = out + u'\n|}' wikipedia.output(out) finally: wikipedia.stopme()
geeft: