User:EiBot/spot-double-redirects.py

#! /usr/bin/env python3
# _*_ coding: utf8 _*_
#
# Detect double or otherwise faulty soft or hard redirects
#
# Copyright (C) 2007, GrafZahl (en.wikisource.org user)
#           (C) 2020, Einstein95 (en.wikisource.org user)
#
# Licence: GPLv2
#
# run with standard args "-log -putthrottle:xx"
#
# Further arguments:
#
#   -cat:xxx
#       Specifies the category for which soft redirects should be
#       checked, for example: -cat:'Soft redirects/August 2006'
#       (replace the single quotes with whatever is appropriate for
#       your shell)
#

import pywikibot

# pywikibot.get_throttle.setDelay(5)

# Handle args

args = pywikibot.handle_args()

month = False

for arg in args:
    if arg[:5] == '-cat:':
        month = arg[5:]

if not month:
    pywikibot.output('(FFF) No category given (-cat:xxx)')
    exit(1)

# basic text tokens, etc.

cattitle = 'Category:%s' % month

# Start operation

site = pywikibot.Site()
cat = pywikibot.Category(site, cattitle)
articles = cat.articles()

# Examine first level soft redirects

pywikibot.output('(III) Starting first level check')

second_level = set()

for page in articles:
    try:
        if not page.exists():
            pywikibot.output('(EEE) [[%s]] does not exist' % page.title())
            continue
        if page.isRedirectPage():
            pywikibot.output('(EEE) [[%s]] is a hard redirect' % page.title())
            continue
        if page.get().find('oft redirect') == -1:
            pywikibot.output('(EEE) [[%s]] may not be a soft redirect' % page.title())
            continue
        lp = [p for p in page.linkedPages() if p.namespace().id == 0]
        if len(lp) != 1:
            pywikibot.output('(EEE) [[%s]] does not have an unambiguous reference to a page on this wiki' % page.title())
            continue
        second_level.add(pywikibot.Page(site, lp[0].title(with_section=False)))
    except pywikibot.Error:
        pywikibot.output('(EEE) Exception processing [[%s]]' % page.title())

# Second level

pywikibot.output('(III) Starting second level check')

for page in second_level:
    try:
        if not page.exists():
            pywikibot.output('(EEE) [[%s]] does not exist; soft redirects pointing to there are dangling' % page.title())
            continue
        if page.isRedirectPage() or (page.get().find('oft redirect') != -1):
            pywikibot.output('(EEE) There exist double redirects pointing to [[%s]]' % page.title())
    except pywikibot.Error:
        pywikibot.output('(EEE) Exception processing [[%s]]' % page.title())