Parser de WP a Pmwiki

Descripción

Pequeño programa que exporta de un WordPress y lo importa dentro de pmwiki

Código

  1. import feedparser
  2. import os
  3. import re
  4. from unidecode import unidecode
  5. from datetime import datetime
  6.  
  7.  
  8. def depurar(html_string):
  9.  
  10.  
  11. #--[ Limpia <a href    
  12. #--[ Limpia <a href    
  13.     pattern = r'<a\s+href="([^"]+)"[^>]*>(.*?)</a>'
  14.     mivar = re.sub(pattern, r'[[\1|\2]]', html_string)
  15.     mivar = mivar.replace('http://gtd.bergonzini.com/wp-content/uploads/','Path:/uploads/GTD/')
  16.  
  17. #--[ Limpia <img    
  18.     pattern = r'<img[^>]*src=["\'](.*?)["\'][^>]*>'
  19.     mivar = re.sub(pattern, r" %width=50pct%\1", mivar)  
  20.  
  21. #-- Eliminamos EM
  22.     pattern = r'<em>(.*?)</em>'
  23.     mivar = re.sub(pattern, r"''\1''", mivar)  
  24.     pattern = r'<i>(.*?)</i>'
  25.     mivar = re.sub(pattern, r"''\1''", mivar)  
  26.  
  27. #-- Eliminamos H3
  28.     pattern = r'<h3(.*?)>(.*?)</h3>'
  29.     mivar = re.sub(pattern, r"!!!\1 ", mivar)    
  30.  
  31.     pattern = r'<h2(.*?)>(.*?)</h2>'
  32.     mivar = re.sub(pattern, r"!!\1 ", mivar)    
  33.  
  34.     pattern = r'<h1(.*?)>(.*?)</h1>'
  35.     mivar = re.sub(pattern, r"!\2 ", mivar)    
  36.  
  37.     mivar = mivar.replace("</p>",r'[[<<]]')
  38.     mivar = mivar.replace("<p>",r'[[<<]]')    
  39.     mivar = mivar.replace(r'<ul>',r'[[<<]]--->')        
  40.     mivar = mivar.replace(r'</ul>',r'[[<<]]')            
  41.     mivar = mivar.replace(r'<div class="wp-block-group alignwide">',"")        
  42.     mivar = mivar.replace('<p align="justify">',"")
  43.     mivar = mivar.replace(r'<!-- wp:paragraph -->',"")
  44.  
  45.  
  46.     return mivar
  47.  
  48. feed = feedparser.parse(r'file.xml')
  49. feed_entries = feed.entries
  50. indice = open('Site.GTDSite','w',encoding="UTF-8")
  51. indice.write('version=pmwiki-2.4.2 ordered=1 urlencoded=1'+os.linesep)
  52. indice.write('agent=Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:135.0) Gecko/20100101 Firefox/135.0'+os.linesep)
  53. indice.write('name=Site.GTD'+os.linesep)
  54. linea = ""      
  55.  
  56. for post in feed_entries:
  57.     especiales = "=)(/ºÂª\\:|’¿?!¡'·#~%&;.,{([])}+*^`+<>"
  58.     ax = post.title.replace( " ", "")
  59.     for ii in especiales:
  60.             ax = ax.replace(ii,"")
  61.     ax = ax.replace('"',"")
  62.     ax = 'GTD.'+unidecode(ax)
  63.     if post.content[0]['value'] != "":      
  64.         mifile = open(ax,'w',encoding="UTF-8")
  65.         mifile.write('version=pmwiki-2.4.2 ordered=1 urlencoded=1'+os.linesep)
  66.         mifile.write('agent=Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:135.0) Gecko/20100101 Firefox/135.0'+os.linesep)
  67.         mifile.write('name='+ax+os.linesep)        
  68.         # mifile.write(post.title+os.linesep)
  69.         mitime = datetime.strptime(post.published, '%a, %d %b %Y %H:%M:%S %z').strftime('%Y%m%d')
  70.         #mifile.write(mitime+os.linesep )
  71.         cambios = '(:title '+post.title+':)(:description '+post.title+' :)'+ depurar(post.content[0]['value'])+'[[<<]] Publicado el '+mitime
  72.         cambios = cambios.replace(os.linesep, '' )
  73.         cambios = cambios.replace('%0a', '' )
  74.         cambios = cambios.replace('%0d', '' )    
  75.         mifile.write('text='+cambios+os.linesep )
  76.         linea = linea + ' '+ mitime + ' [[' + ax + ' | '+ post.title + ']]'
  77.         mifile.close()
  78.  
  79. indice.write('text='+linea)        
  80. indice.close()        

Python : PyDev : py Programación : wordpress : pmwiki

Última modificación de la página el 05 May 2026 a las 15h11
Powered by PmWiki