Difference between revisions 14130319 and 14143077 on frwiktionary[[Catégorie:JackBot]] <source lang=python> #!/usr/bin/env python # coding: utf-8 # Ce script formate les pages du Wiktionnaire, tous les jours après minuit depuis le Toolserver : # 1) Retire certains doublons. # 2) Ajoute les clés de tris, prononciations vides, et certains liens vers les conjugaisons. (contracted; show full) summary = summary + u', {{clé de tri}} inutile' if PageTemp.find(u'{{clé de tri') == -1 and PageTemp.find(u'{{clef de tri') == -1: ClePage = CleDeTri(PageHS) if ClePage != u'' and ClePage != PageHS and ClePage.lower() != PageHS.lower(): '''if PageTemp.find(u'[[Catégorie:') != -1: PageTemp = PageTemp[0:PageTemp.find(u'[[Catégorie:')] + u'\n{{clé de tri|' + ClePage + u'}}\n' + PageTemp[PageTemp.find(u'[[Catégorie:'):len(PageTemp)] elif PageTemp.find(u'[[Category:') != -1: (contracted; show full) ClePage = PageTemp2[0:PageTemp2.find(u'}}')] if CleDeTri(PageHS) != ClePage and (PageTemp.find(u'{{langue|fr}}') != -1 or PageTemp.find(u'{{langue|eo}}') != -1 or PageTemp.find(u'{{langue|en}}') != -1 or PageTemp.find(u'{{langue|es}}') != -1 or PageTemp.find(u'{{langue|de}}') != -1 or PageTemp.find(u'{{langue|pt}}') != -1 or PageTemp.find(u'{{langue|it}}') != -1): #print CleDeTri(PageHS) #print ClePage summary = summary + u', {{clé de tri}} corrigée' PageTemp = PageTemp[0:PageTemp.find(u'{{clé de tri|')+len(u'{{clé de tri|')] + CleDeTri(PageHS) + PageTemp[PageTemp.find(u'{{clé de tri|')+len(u'{{clé de tri|')+PageTemp2.find(u'}}'):len(PageTemp)] # Remplacements consensuels while PageTemp.find(u' ') != -1: (contracted; show full) else: PageEnd = PageEnd + PageTemp PageTemp = u'' #print(PageEnd.encode(config.console_encoding, 'replace')) #print(PageTemp.encode(config.console_encoding, 'replace')) PageTemp = PageEnd + PageTemp PageEnd = u''""" # Ajout des anagrammes francophones (prévoir si {{lien|}} pour les autres) if socket.gethostname() != '"willow'" and socket.gethostname() != 'aberge'"yarrow" and socket.gethostname() != "nightshade": if PageTemp.find(u'{{-anagr-}}') == -1 and PageHS.find(u' ') == -1 and PageTemp.find(u'{{langue|fr}}') != -1 and len(PageHS) < 7: # sinon trop long ( > 1 h par page) print socket.gethostname() anagrammes = anagram(PageHS) ListeAnagrammes = u'' for anagramme in anagrammes: if anagramme != PageHS: pageAnagr = Page(site,anagramme) (contracted; show full) while PageTemp.find(u'ε') != -1 and PageTemp.find(u'ε') < PageTemp.find(u'}}'): PageTemp = PageTemp[0:PageTemp.find(u'ε')] + u'ɛ' + PageTemp[PageTemp.find(u'ε')+1:len(PageTemp)] while PageTemp.find(u'ε̃') != -1 and PageTemp.find(u'ε̃') < PageTemp.find(u'}}'): PageTemp = PageTemp[0:PageTemp.find(u'ε̃')] + u'ɛ̃' + PageTemp[PageTemp.find(u'ε̃')+1:len(PageTemp)] while PageTemp.find(u':') != -1 and PageTemp.find(u':') < PageTemp.find(u'}}'): PageTemp = PageTemp[0:PageTemp.find(u':')] + u'ː' + PageTemp[PageTemp.find(u':')+1:len(PageTemp)] while PageTemp.find(u'g') != -1 and PageTemp.find(u'g') < PageTemp.find(u'}}'): PageTemp = PageTemp[0:PageTemp.find(u'g')] + u'ɡ' + PageTemp[PageTemp.find(u'g')+1:len(PageTemp)]⏎ if PageTemp[0:8] == u'pron||}}': PageEnd = PageEnd + PageTemp[0:PageTemp.find("}}")] + codelangue + "}}" PageTemp = PageTemp[PageTemp.find("}}")+2:len(PageTemp)] break elif PageTemp[position:position+3] == u'|}}' or PageTemp[position:position+4] == u'| }}': PageEnd = PageEnd + PageTemp[0:position] + "||" + codelangue + "}}" (contracted; show full)tre:lettre+1] == u'Ǖ' or PageTitre[lettre:lettre+1] == u'Ǘ' or PageTitre[lettre:lettre+1] == u'Ǚ' or PageTitre[lettre:lettre+1] == u'Ǜ' or PageTitre[lettre:lettre+1] == u'Ȕ' or PageTitre[lettre:lettre+1] == u'Ȗ' or PageTitre[lettre:lettre+1] == u'Ʉ' or PageTitre[lettre:lettre+1] == u'ủ' or PageTitre[lettre:lettre+1] == u'Ủ' or PageTitre[lettre:lettre+1] == u'ú' or PageTitre[lettre:lettre+1] == u'Ú' or PageTitre[lettre:lettre+1] == u'ù' or PageTitre[lettre:lettre+1] == u'Ù': PageT = PageT + "u" key = "yes" elif PageTitre[lettre:lettre+1] == u'ʋ' or PageTitre[lettre:lettre+1] == u'Ʋ' or PageTitre[lettre:lettre+1] == u'Ʌ' or PageTitre[lettre:lettre+1] == u'ʌ': PageT = PageT + "v" key = "yes" elif PageTitre[lettre:lettre+1] == u'ŵ' or PageTitre[lettre:lettre+1] == u'Ŵ': PageT = PageT + "w" (contracted; show full)TraitementRecherche = crawlerSearch(u'chinois') TraitementUtilisateur = crawlerUser(u'Utilisateur:JackBot') TraitementRedirections = crawlerRedirects() TraitementTout = crawlerAll(u'') while 1: TraitementRC = crawlerRC() ''' </source> All content in the above text box is licensed under the Creative Commons Attribution-ShareAlike license Version 4 and was originally sourced from https://fr.wiktionary.org/w/index.php?diff=prev&oldid=14143077.
![]() ![]() This site is not affiliated with or endorsed in any way by the Wikimedia Foundation or any of its affiliates. In fact, we fucking despise them.
|