欧美成人午夜免费全部完,亚洲午夜福利精品久久,а√最新版在线天堂,另类亚洲综合区图片小说区,亚洲欧美日韩精品色xxx

千鋒扣丁學(xué)堂Java培訓(xùn)之實(shí)現(xiàn)查找PDF關(guān)鍵字所在頁(yè)碼及其坐標(biāo)

2019-09-05 14:44:32 1203瀏覽

今天千鋒扣丁學(xué)Java培訓(xùn)老師給大家分享一篇關(guān)于java實(shí)現(xiàn)查找PDF關(guān)鍵字所在頁(yè)碼及其坐標(biāo)的詳細(xì)介紹,本文通過(guò)實(shí)例代碼給大家介紹的非常詳細(xì),因?yàn)樽罱羞@方面的需求,此功能跟PDF中Ctrl+F性質(zhì)一樣,如果PDF中為圖片形式的不支持定位到關(guān)鍵字。



import com.itextpdf.awt.geom.Rectangle2D.Float;
import com.itextpdf.text.pdf.PdfDictionary;
import com.itextpdf.text.pdf.PdfName;
import com.itextpdf.text.pdf.PdfReader;
import com.itextpdf.text.pdf.parser.*;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
/**
 * 消失的太陽(yáng)
 */
public class MyTest {
 public static void main(String[] args) throws IOException {
  //1.給定文件
  File pdfFile = new File("D://test.pdf");
  //2.定義一個(gè)byte數(shù)組,長(zhǎng)度為文件的長(zhǎng)度
  byte[] pdfData = new byte[(int) pdfFile.length()];
  //3.IO流讀取文件內(nèi)容到byte數(shù)組
  FileInputStream inputStream = null;
  try {
   inputStream = new FileInputStream(pdfFile);
   inputStream.read(pdfData);
  } catch (IOException e) {
   throw e;
  } finally {
   if (inputStream != null) {
    try {
     inputStream.close();
    } catch (IOException e) {
    }
   }
  }
  //4.指定關(guān)鍵字
  String keyword = "消失的太陽(yáng):";
  //5.調(diào)用方法,給定關(guān)鍵字和文件
  List<float[]> positions = findKeywordPostions(pdfData, keyword);
  //6.返回值類型是 List<float[]> 每個(gè)list元素代表一個(gè)匹配的位置,分別為 float[0]所在頁(yè)碼 float[1]所在x軸 float[2]所在y軸
  System.out.println("total:" + positions.size());
  if (positions != null && positions.size() > 0) {
   for (float[] position : positions) {
    System.out.print("pageNum: " + (int) position[0]);
    System.out.print("\tx: " + position[1]);
    System.out.println("\ty: " + position[2]);
   }
  }
 }
 /**
  * findKeywordPostions
  * @param pdfData  通過(guò)IO流 PDF文件轉(zhuǎn)化的byte數(shù)組
  * @param keyword  關(guān)鍵字
  * @return List<float [ ]> : float[0]:pageNum float[1]:x float[2]:y
  * @throws IOException
  */
 public static List<float[]> findKeywordPostions(byte[] pdfData, String keyword) throws IOException {
  List<float[]> result = new ArrayList<>();
  List<PdfPageContentPositions> pdfPageContentPositions = getPdfContentPostionsList(pdfData);
  for (PdfPageContentPositions pdfPageContentPosition : pdfPageContentPositions) {
   List<float[]> charPositions = findPositions(keyword, pdfPageContentPosition);
   if (charPositions == null || charPositions.size() < 1) {
    continue;
   }
   result.addAll(charPositions);
  }
  return result;
 }
 private static List<PdfPageContentPositions> getPdfContentPostionsList(byte[] pdfData) throws IOException {
  PdfReader reader = new PdfReader(pdfData);
  List<PdfPageContentPositions> result = new ArrayList<>();
  int pages = reader.getNumberOfPages();
  for (int pageNum = 1; pageNum <= pages; pageNum++) {
   float width = reader.getPageSize(pageNum).getWidth();
   float height = reader.getPageSize(pageNum).getHeight();
   PdfRenderListener pdfRenderListener = new PdfRenderListener(pageNum, width, height);
   //解析pdf,定位位置
   PdfContentStreamProcessor processor = new PdfContentStreamProcessor(pdfRenderListener);
   PdfDictionary pageDic = reader.getPageN(pageNum);
   PdfDictionary resourcesDic = pageDic.getAsDict(PdfName.RESOURCES);
   try {
    processor.processContent(ContentByteUtils.getContentBytesForPage(reader, pageNum), resourcesDic);
   } catch (IOException e) {
    reader.close();
    throw e;
   }
   String content = pdfRenderListener.getContent();
   List<CharPosition> charPositions = pdfRenderListener.getcharPositions();
   List<float[]> positionsList = new ArrayList<>();
   for (CharPosition charPosition : charPositions) {
    float[] positions = new float[]{charPosition.getPageNum(), charPosition.getX(), charPosition.getY()};
    positionsList.add(positions);
   }
   PdfPageContentPositions pdfPageContentPositions = new PdfPageContentPositions();
   pdfPageContentPositions.setContent(content);
   pdfPageContentPositions.setPostions(positionsList);
   result.add(pdfPageContentPositions);
  }
  reader.close();
  return result;
 }
 private static List<float[]> findPositions(String keyword, PdfPageContentPositions pdfPageContentPositions) {
  List<float[]> result = new ArrayList<>();
  String content = pdfPageContentPositions.getContent();
  List<float[]> charPositions = pdfPageContentPositions.getPositions();
  for (int pos = 0; pos < content.length(); ) {
   int positionIndex = content.indexOf(keyword, pos);
   if (positionIndex == -1) {
    break;
   }
   float[] postions = charPositions.get(positionIndex);
   result.add(postions);
   pos = positionIndex + 1;
  }
  return result;
 }
 private static class PdfPageContentPositions {
  private String content;
  private List<float[]> positions;
  public String getContent() {
   return content;
  }
  public void setContent(String content) {
   this.content = content;
  }
  public List<float[]> getPositions() {
   return positions;
  }
  public void setPostions(List<float[]> positions) {
   this.positions = positions;
  }
 }
 private static class PdfRenderListener implements RenderListener {
  private int pageNum;
  private float pageWidth;
  private float pageHeight;
  private StringBuilder contentBuilder = new StringBuilder();
  private List<CharPosition> charPositions = new ArrayList<>();
  public PdfRenderListener(int pageNum, float pageWidth, float pageHeight) {
   this.pageNum = pageNum;
   this.pageWidth = pageWidth;
   this.pageHeight = pageHeight;
  }
  public void beginTextBlock() {
  }
  public void renderText(TextRenderInfo renderInfo) {
   List<TextRenderInfo> characterRenderInfos = renderInfo.getCharacterRenderInfos();
   for (TextRenderInfo textRenderInfo : characterRenderInfos) {
    String word = textRenderInfo.getText();
    if (word.length() > 1) {
     word = word.substring(word.length() - 1, word.length());
    }
    Float rectangle = textRenderInfo.getAscentLine().getBoundingRectange();
    float x = (float)rectangle.getX();
    float y = (float)rectangle.getY();
//    float x = (float)rectangle.getCenterX();
//    float y = (float)rectangle.getCenterY();
//    double x = rectangle.getMinX();
//    double y = rectangle.getMaxY();
    //這兩個(gè)是關(guān)鍵字在所在頁(yè)面的XY軸的百分比
    float xPercent = Math.round(x / pageWidth * 10000) / 10000f;
    float yPercent = Math.round((1 - y / pageHeight) * 10000) / 10000f;
//    CharPosition charPosition = new CharPosition(pageNum, xPercent, yPercent);
    CharPosition charPosition = new CharPosition(pageNum, (float)x, (float)y);
    charPositions.add(charPosition);
    contentBuilder.append(word);
   }
  }
  public void endTextBlock() {
  }
  public void renderImage(ImageRenderInfo renderInfo) {
  }
  public String getContent() {
   return contentBuilder.toString();
  }
  public List<CharPosition> getcharPositions() {
   return charPositions;
  }
 }
 private static class CharPosition {
  private int pageNum = 0;
  private float x = 0;
  private float y = 0;
  public CharPosition(int pageNum, float x, float y) {
   this.pageNum = pageNum;
   this.x = x;
   this.y = y;
  }
  public int getPageNum() {
   return pageNum;
  }
  public float getX() {
   return x;
  }
  public float getY() {
   return y;
  }
  @Override
  public String toString() {
   return "[pageNum=" + this.pageNum + ",x=" + this.x + ",y=" + this.y + "]";
  }
 }
}

以上就是關(guān)于千鋒扣丁學(xué)堂Java培訓(xùn)之實(shí)現(xiàn)查找PDF關(guān)鍵字所在頁(yè)碼及其坐標(biāo)的全部?jī)?nèi)容,希望本文所述對(duì)大家java程序設(shè)計(jì)有所幫助,想要學(xué)好Java開(kāi)發(fā)小編給大家推薦口碑良好的扣丁學(xué)堂,扣丁學(xué)堂有專業(yè)老師制定的Java學(xué)習(xí)路線圖輔助學(xué)員學(xué)習(xí),此外還有與時(shí)俱進(jìn)的Java課程體系和Java視頻教程供大家學(xué)習(xí),想要學(xué)好Java開(kāi)發(fā)技術(shù)的小伙伴快快行動(dòng)吧??鄱W(xué)堂Java技術(shù)交流群:850353792。


                          JavaEE/微服務(wù)/源碼解析/分布式/企業(yè)級(jí)架構(gòu)【VIP體驗(yàn)課】


     【關(guān)注微信公眾號(hào)獲取更多學(xué)習(xí)資料】        【掃碼進(jìn)入JavaEE/微服務(wù)VIP免費(fèi)公開(kāi)課】  



查看更多關(guān)于“Java開(kāi)發(fā)資訊”的相關(guān)文章>>

標(biāo)簽: Java培訓(xùn) Java視頻教程 Java多線程 Java面試題 Java學(xué)習(xí)視頻 springBoot項(xiàng)目

熱門專區(qū)

暫無(wú)熱門資訊

課程推薦

微信
微博
15311698296

全國(guó)免費(fèi)咨詢熱線

郵箱:codingke@1000phone.com

官方群:148715490

北京千鋒互聯(lián)科技有限公司版權(quán)所有   北京市海淀區(qū)寶盛北里西區(qū)28號(hào)中關(guān)村智誠(chéng)科創(chuàng)大廈4層
京ICP備2021002079號(hào)-2   Copyright ? 2017 - 2022
返回頂部 返回頂部