Szerkesztő:BinBot/huwiki/category.py

Expanation / Magyarázat

"""Category-related stuff for Hungarian Wikipedia."""
#
# (C) Bináris, 2023
#
# Distributed under the terms of the MIT license.

import re
import pywikibot

class CatProperties(object):
    """A class to hold various category-related functions.

    It is easier to export if they are in a class.
    """
    def __init__(self, cat: pywikibot.Category) -> None:
        self.cat = cat

    def is_cityzencat(self) -> bool:
        """Is it like "Miskolciak" (inhabitants of)?
        
        Currently searches for supercategory only on the first level.
        May produce false negatives.
        """
        pattern = re.compile(r'Személyek település szerint \(.*?\)')
        for supercat in self.cat.categories():
            if pattern.fullmatch(supercat.title(with_ns=False)):
                return True
        return False

    def is_secondary_category(self) -> bool:
        """Is it a secondary category?

        Secondary categories, such as birth, death, city, awards etc.
        are useful, but not enough to say the article properly
        categorized. Current list is connected to biographies.

        Of course, this is not exact and not fast at all.
        A page is not really categorized, if all of its categories are
        among:
        - hidden categories
        - Élő személyek (living persons)
        - ...- született személyek (births in year or city)
        - ...- elhunyt személyek (deaths in year or city or disease)
        - ... személyek (some persons, e.g. changed their names)
        - ...-tagok (members of)
        - ... díjasok, érmesek, koszorúsok (awarded with)
        - ... kitüntetettjei, birtokosai, tulajdonosai (awarded with)
        - ... család (member of a family)
        - Nők (women) or "... nők" (some kind of women, with space)
          (but 'Magyar írónők' is OK!)
        - inhabitants of... (see is_cityzencat() above)
        """
        endings = ' személyek|-tagok| tagjai| nők| magyarok| család' \
            + 'díjasok|érmesek|koszorúsok|díszpolgárai|' \
            + 'kitüntetettjei|birtokosai|tulajdonosai|' \
            + 'diákok|diákjai|tanárai|végzettjei|túlélői'
        badend = re.compile(fr'({endings})$')
        title = self.cat.title(with_ns=False)
        if title == 'Nők':
            return True
        if ' származású ' in title:
            return True
        if badend.search(title):
            return True
        return False
        
    def is_not_real_categorization(self) -> bool:
        """This category is not enough to properly categorize a page.
        
        If an article has only these categories, needs attention.
        """
        return self.cat.isHiddenCategory() \
            or self.is_cityzencat() \
            or self.is_secondary_category()