PDF修改
0
最近遇到一个需要修改PDF的问题,但是发现中文支持非常糟糕,而且网络上的代码都是同一个例子,这个例子里面有些分段的字符串是不能替换的。
源代码看起来也是一头包,暂时写了点在下面,以后有时间优化一下看看吧。
使用版本
pdfbox-1.8.10
public static void main(String[] args) {
replacePDFContentStr("e://test.pdf", "13888888888", "admin@acgist.com");
}
public static void replacePDFContentStr(String filePath, String ... contents) {
String outFilePath = "e://test-tmp.pdf";
PDDocument document = null;
try {
document = PDDocument.load(new File(filePath));
PDPage page = (PDPage) document.getDocumentCatalog().getAllPages().get(0);
PDStream stream = page.getContents();
PDFStreamParser parser = new PDFStreamParser(stream.getStream());
parser.parse();
List<?> tokens = parser.getTokens();
for (int i = 0; i < tokens.size(); i++) {
Object object = tokens.get(i);
System.out.println(object);
if(object instanceof COSArray) {
COSArray cosArray = (COSArray) object;
String txt = "";
for (int j = 0; j < cosArray.size(); j++) {
Object tmp = cosArray.get(j);
if(tmp instanceof COSString) {
COSString cosString = (COSString) tmp;
String word = cosString.getString();
txt += word;
}
}
// System.out.println(txt);
for(String content : contents) {
if(txt.contains(content)) {
// cosArray.clear();
cosArray.clear();
String replace = "";
for(int j = 0; j < content.length(); j++)
replace += "*";
cosArray.add(new COSString(txt.replace(content, replace)));
}
}
}
}
PDStream pdStream = new PDStream(document);
OutputStream outputStream = pdStream.createOutputStream();
ContentStreamWriter contentStreamWriter = new ContentStreamWriter(outputStream);
contentStreamWriter.writeTokens(tokens);
outputStream.close();
page.setContents(pdStream);
document.save(new File(outFilePath));
} catch (IOException | COSVisitorException e) {
e.printStackTrace();
} finally {
try {
if (document != null)
document.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}