Szerkesztő:BinBot/huwiki/wikidata.py

Explanation / Magyarázat

Részletes leírás és példák a modul használatára

"""Wikidata-related stuff for Hungarian Wikipedia."""

#
# (C) Bináris, 2024
#
# Distributed under the terms of the MIT license.

from typing import Union
import pywikibot
from pywikibot.backports import Iterable

site = pywikibot.Site()
repo = site.data_repository()

# https://doc.wikimedia.org/pywikibot/master/api_ref/pywikibot.page.html#page.Claim
# https://www.wikidata.org/wiki/Help:Ranking

class ItemPlus(pywikibot.ItemPage):
    """A Wikidata page with enhanced methods.

    May be instantiated with either itempage or 'Qxxxx'.
    """

    def __init__(self, item: Union[pywikibot.ItemPage, str]) -> None:
        """Call it for an existing page only. Will not be checked."""
        if isinstance(item, pywikibot.ItemPage):
            title = item.title()
        elif isinstance(item, str) \
                and item.startswith('Q') and item[1:].isdigit():
            title = item
        else:
            raise ValueError(
                f'{item} is neither a Wikidata item page nor Q number.')
        super(ItemPlus, self).__init__(repo, title)
        self.data = self.get(get_redirect=True)

    def _getstr(self,
                strings: pywikibot.page._collections.LanguageDict) -> str:
        return strings.get('hu', strings.get('en', ''))

    @property
    def label(self) -> str:
        return self._getstr(self.data['labels'])

    @property
    def description(self) -> str:
        return self._getstr(self.data['descriptions'])

    @property
    def has_magyar(self) -> bool:
        return 'huwiki' in list(self.sitelinks)

    @property
    def hupage(self) -> Union[pywikibot.Category, pywikibot.Page]:
        """Return the huwiki page belonging to the item.

        If the page represents a category, the returned type is
        pywikibot.Category. Otherwise it is pywikibot.Page.
        """

        if not 'huwiki' in list(self.sitelinks):
            return None
        title = self.getSitelink('huwiki')
        if self.property_has_value('P31', 'Q4167836'):
            return pywikibot.Category(site, title)
        return pywikibot.Page(site, title)

    @property
    def is_magyar(self) -> bool:
        """Try to say if the person is likely to be Hungarian."""
        # 1. Look for cityzenship in Wikidata
        # 2. Examine label if Wikidata item

        # Is P27 'Magyarország' or 'Magyar Királyság' in Wikidata?
        hun = self.property_has_value('P27', 'Q28') \
                or self.property_has_value('P27', 'Q171150')
        if hun is not None:  # P27 in WD item found
            return hun

        words = self.description.lower().split()
        if 'magyar' in words or 'hungarian' in words:
            return True

        return False

    def property_has_value(self,
                           p: str,
                           q: str,
                           rank=None  # Not implemented, TODO
                           ) -> Union[bool, None]:
        """Tell if the given p property has the given q value.

        E.g. property_has_value('P31', 'Q5') == True if it is a person.
        Code is partially derived from
        https://gerrit.wikimedia.org/r/c/pywikibot/core/+/888791/
        (C) Author of copied part: Ayush Anand33
        """
        if p in self.claims:
            p_claims = self.claims[p]
            for claim in p_claims:
                try:
                    if claim.getTarget().getID() == q:
                        return True
                # Hitler (Q352) has 6 citizenships, one of them w/o name
                # Q81219 has place of burial/nyughely (P119) w/o value
                except AttributeError:
                    pass
            return False  # Has the given P in WD, but not the given Q.
        else:
            return None

    def properties(self) -> Iterable:
        """Yield the properties of the item with Hungarian or English name."""
        for p in self.data['claims']:
            prop = pywikibot.PropertyPage(repo, p)
            propdata = prop.get()
            labels = propdata['labels']
            label = labels.get('hu', labels.get('en', ''))
            yield p, label

    def has_auth_ctrl(self) -> bool:
        """Tell if the item has any of IDs used in {{Nemzetközi katalógusok}}."""
        # https://hu.wikipedia.org/wiki/Sablon:Nemzetk%C3%B6zi_katal%C3%B3gusok
        plist = ['P213', 'P214', 'P227', 'P244', 'P254', 'P268', 'P269',
                 'P396', 'P434', 'P496', 'P549', 'P651', 'P691', 'P906',
                 'P950', 'P951', 'P1015', 'P1157', 'P2492', 'P3133', 'P3973',
                 'P6796', 'P10832',
                ]
        return any([p in self.claims for p in plist])


def p_q_generator(p: str,
                 q: str,
                 has_hu: str = 'yes',
                 hu: bool = True,
                 total: int = None,
                 ) -> Iterable:
    """Yield Wikidata items where p has value q.

    If has_hu == 'yes', yield only those that have a sitelink in huwiki.
    If has_hu == 'no', yield only those that don't have a sitelink in huwiki.
    Otherwise yield all items.
    If hu is True, yield directly the huwiki page rather than Wikidata page.
    Will work only with has_hu = 'yes'.
    """

    target_item = pywikibot.ItemPage(repo, q)
    for page in target_item.backlinks(namespaces=0, total=total):
        item = ItemPlus(page.title())
        if not item.property_has_value(p, q):
            continue
        if has_hu == 'yes':
            if item.has_magyar:
                yield [item, item.hupage][hu]
            continue
        if has_hu == 'no':
            if not item.has_magyar:
                yield item
            continue
        yield item