Xwiki不愧是最适合工程师用的企业内部知识管理系统:导入MediaWiki内容到XWiki
Groovy scriptimportorg.dom4j.io.SAXReaderimportorg.dom4j.*importgroovy.net.xmlrpc.*importjava.net.ServerSocketimportorg.wikimodel.wem.mediawiki.MediaWikiParserimportorg.wikimodel.wem.xwiki.*classPruni
Groovy script
import org.dom4j.io.SAXReader
import org.dom4j.*
import groovy.net.xmlrpc.*
import java.net.ServerSocket
import org.wikimodel.wem.mediawiki.MediaWikiParser
import org.wikimodel.wem.xwiki.*
class PruningPageHandler implements ElementHandler {
def proxy, token;
def counter = 0;
def max = 10000;
PruningPageHandler(proxy, token) {
this.proxy = proxy
this.token = token
}
def messages = []
public void onStart(ElementPath path) { }
public void onEnd(ElementPath path) {
def page = path.current
def title = page.elementText('title')
title = title.replaceAll(' ','_')
def id = page.elementText('id')
println(title+ '('+counter+')')
def revision = page.element('revision')
def revid = revision.elementText('id');
def revtext = revision.elementText('text');
def contributor = revision.element('contributor')
def username = contributor.elementText('username')
def index = revtext.substring(0, Math.min(30,revtext.length())).toLowerCase().indexOf("redirect")
counter++;
if (counter <</span> max && index <</span> 0) {
revtext = revtext.replaceFirst("^-", "*");
revtext = revtext.replaceAll("__","")
revtext = revtext.replaceAll("[\\|][\\+]","")
def buffer = new StringBuffer()
buffer.append(revtext)
try {
def reader = new StringReader(revtext);
def parser = new MediaWikiParser();
buffer = new StringBuffer()
def listener = new XWikiSerializer(buffer);
parser.parse(reader, listener);
} catch (Exception e) {
println(e.getMessage())
}
def map = new HashMap()
map.put('content', buffer.toString())
map.put('modifier', username)
map.put('space','Wikipedia')
map.put('title',title)
try {
proxy.confluence1.storePage(token, map)
} catch (Exception e) {
println(e.getMessage())
}
}
page.detach() // prune the tree
}
}
def server = new XMLRPCServer()
def proxy = new XMLRPCServerProxy("http://xwikiserver/xwiki/xmlrpc/confluence")
def token = proxy.confluence1.login("","")
def reader = new SAXReader()
def handler = new PruningPageHandler(proxy, token)
File f = new File("/home/slauriere/enwiki-20070908-pages-articles.xml.bz2.1.out")
FileInputStream fis = new FileInputStream(f);
reader.addHandler('/mediawiki/page', handler)
reader.setEncoding('UTF-8')
reader.read(fis)


魔乐社区(Modelers.cn) 是一个中立、公益的人工智能社区,提供人工智能工具、模型、数据的托管、展示与应用协同服务,为人工智能开发及爱好者搭建开放的学习交流平台。社区通过理事会方式运作,由全产业链共同建设、共同运营、共同享有,推动国产AI生态繁荣发展。
更多推荐

所有评论(0)