User:TalBot/spot-double-redirects.py

#! /usr/bin/env python
# _*_ coding: utf8 _*_
#
# Detect double or otherwise faulty soft or hard redirects
#
# Copyright (C) 2007, GrafZahl (en.wikisource.org user)
#
# Licence: GPLv2
#
# run with standard args "-log -putthrottle:xx"
#
# Further arguments:
#
#	-cat:xxx
#		Specifies the category for which soft redirects should be
#		checked, for example: -cat:'Soft redirects/August 2006'
#		(replace the single quotes with whatever is appropriate for
#		your shell)
#

import catlib, sys, wikipedia

wikipedia.get_throttle.setDelay(5)

# Handle args

args = wikipedia.handleArgs()

month = False

for arg in args:
	if arg[:5] == u'-cat:':
		month = arg[5:]

if not month:
	wikipedia.output(u'(FFF) No category given (-cat:xxx)')
	sys.exit(1)

# basic text tokens, etc.

cattitle = u'Category:%s' % month

# Start operation

site = wikipedia.getSite()
cat = catlib.Category(site, cattitle)
articles = cat.articles()

# Examine first level soft redirects

wikipedia.output(u'(III) Starting first level check')

second_level = set()

for page in articles:
	try:
		if not page.exists():
			wikipedia.output(u'(EEE) [[%s]] does not exist' % page.title())
			continue
		if page.isRedirectPage():
			wikipedia.output(u'(EEE) [[%s]] is a hard redirect' % page.title())
			continue
		if page.get().find(u'oft redirect') == -1:
			wikipedia.output(u'(EEE) [[%s]] may not be a soft redirect' % page.title())
			continue
		lp = page.linkedPages()
		if len(lp) != 1:
			wikipedia.output(u'(EEE) [[%s]] does not have an unambiguous reference to a page on this wiki' % page.title())
			continue
		# wikipedia.py does not always recognise correct section
		# anchors. Therefore we ignore sections in the second level
		# check. This is acceptable as the worst that can happen is
		# that the user gets to the correct page without scrolling to
		# the correct section.
		#
		# See bug #2928239
		second_level.add(wikipedia.Page(site, lp[0].sectionFreeTitle()))
	except wikipedia.Error:
		wikipedia.output(u'(EEE) Exception processing [[%s]]' % page.title())

# Second level

wikipedia.output(u'(III) Starting second level check')

for page in second_level:
	try:
		if not page.exists():
			wikipedia.output(u'(EEE) [[%s]] does not exist; soft redirects pointing to there are dangling' % page.title())
			continue
		if page.isRedirectPage() or (page.get().find(u'oft redirect') != -1):
			wikipedia.output(u'(EEE) There exist double redirects pointing to [[%s]]' % page.title())
	except wikipedia.Error:
		wikipedia.output(u'(EEE) Exception processing [[%s]]' % page.title())