我一直在成功使用此代碼來提取在PDF每頁中找到的第一張圖像。但是,由于某種未知的原因,它現在無法使用某些新的PDF。我使用了其他工具(Datalogics等),這些工具確實可以用這些新的PDF提取圖像。但是,如果可以使用iTextSharp,我不想購買Datalogics或任何工具。誰能告訴我為什么此代碼無法在PDF中找到圖像?已知:我的PDF每頁只有1張圖片,沒有其他內容。using iTextSharp.text;using iTextSharp.text.pdf;...public static void ExtractImagesFromPDF(string sourcePdf, string outputPath){
// NOTE: This will only get the first image it finds per page.
PdfReader pdf = new PdfReader(sourcePdf);
RandomAccessFileOrArray raf = new iTextSharp.text.pdf.RandomAccessFileOrArray(sourcePdf);
try
{
for (int pageNumber = 1; pageNumber <= pdf.NumberOfPages; pageNumber++)
{
PdfDictionary pg = pdf.GetPageN(pageNumber);
PdfDictionary res = (PdfDictionary)PdfReader.GetPdfObject(pg.Get(PdfName.RESOURCES));
PdfDictionary xobj = (PdfDictionary)PdfReader.GetPdfObject(res.Get(PdfName.XOBJECT));
if (xobj != null)
{
foreach (PdfName name in xobj.Keys)
{
PdfObject obj = xobj.Get(name);
if (obj.IsIndirect())
{
PdfDictionary tg = (PdfDictionary)PdfReader.GetPdfObject(obj);
PdfName type = (PdfName)PdfReader.GetPdfObject(tg.Get(PdfName.SUBTYPE));
if (PdfName.IMAGE.Equals(type))
{
int XrefIndex = Convert.ToInt32(((PRIndirectReference)obj).Number.ToString(System.Globalization.CultureInfo.InvariantCulture));
PdfObject pdfObj = pdf.GetPdfObject(XrefIndex);
PdfStream pdfStrem = (PdfStream)pdfObj;
byte[] bytes = PdfReader.GetStreamBytesRaw((PRStream)pdfStrem);
if ((bytes != null))
{
}
}
}
}
}
}
}
3 回答

阿晨1998
TA貢獻2037條經驗 獲得超6個贊
這是一個更簡單的解決方案:
iTextSharp.text.pdf.parser.PdfImageObject pdfImage = new iTextSharp.text.pdf.parser.PdfImageObject(imgPRStream); System.Drawing.Image img = pdfImage.GetDrawingImage();

犯罪嫌疑人X
TA貢獻2080條經驗 獲得超4個贊
以下代碼合并了Dave和R Ubben的所有想法,并返回了所有圖像的完整列表,還處理了多個位深度。我不得不將其轉換為我正在從事的項目的VB,對此感到抱歉...
Private Sub getAllImages(ByVal dict As pdf.PdfDictionary, ByVal images As List(Of Byte()), ByVal doc As pdf.PdfReader) Dim res As pdf.PdfDictionary = CType(pdf.PdfReader.GetPdfObject(dict.Get(pdf.PdfName.RESOURCES)), pdf.PdfDictionary) Dim xobj As pdf.PdfDictionary = CType(pdf.PdfReader.GetPdfObject(res.Get(pdf.PdfName.XOBJECT)), pdf.PdfDictionary) If xobj IsNot Nothing Then For Each name As pdf.PdfName In xobj.Keys Dim obj As pdf.PdfObject = xobj.Get(name) If (obj.IsIndirect) Then Dim tg As pdf.PdfDictionary = CType(pdf.PdfReader.GetPdfObject(obj), pdf.PdfDictionary) Dim subtype As pdf.PdfName = CType(pdf.PdfReader.GetPdfObject(tg.Get(pdf.PdfName.SUBTYPE)), pdf.PdfName) If pdf.PdfName.IMAGE.Equals(subtype) Then Dim xrefIdx As Integer = CType(obj, pdf.PRIndirectReference).Number Dim pdfObj As pdf.PdfObject = doc.GetPdfObject(xrefIdx) Dim str As pdf.PdfStream = CType(pdfObj, pdf.PdfStream) Dim bytes As Byte() = pdf.PdfReader.GetStreamBytesRaw(CType(str, pdf.PRStream)) Dim filter As String = tg.Get(pdf.PdfName.FILTER).ToString Dim width As String = tg.Get(pdf.PdfName.WIDTH).ToString Dim height As String = tg.Get(pdf.PdfName.HEIGHT).ToString Dim bpp As String = tg.Get(pdf.PdfName.BITSPERCOMPONENT).ToString If filter = "/FlateDecode" Then bytes = pdf.PdfReader.FlateDecode(bytes, True) Dim pixelFormat As System.Drawing.Imaging.PixelFormat Select Case Integer.Parse(bpp) Case 1 pixelFormat = Drawing.Imaging.PixelFormat.Format1bppIndexed Case 24 pixelFormat = Drawing.Imaging.PixelFormat.Format24bppRgb Case Else Throw New Exception("Unknown pixel format " + bpp) End Select Dim bmp As New System.Drawing.Bitmap(Int32.Parse(width), Int32.Parse(height), pixelFormat) Dim bmd As System.Drawing.Imaging.BitmapData = bmp.LockBits(New System.Drawing.Rectangle(0, 0, Int32.Parse(width), Int32.Parse(height)), System.Drawing.Imaging.ImageLockMode.WriteOnly, pixelFormat) Marshal.Copy(bytes, 0, bmd.Scan0, bytes.Length) bmp.UnlockBits(bmd) Using ms As New MemoryStream bmp.Save(ms, System.Drawing.Imaging.ImageFormat.Png) bytes = ms.GetBuffer End Using End If images.Add(bytes) ElseIf pdf.PdfName.FORM.Equals(subtype) Or pdf.PdfName.GROUP.Equals(subtype) Then getAllImages(tg, images, doc) End If End If Next End IfEnd Sub
- 3 回答
- 0 關注
- 1220 瀏覽
相關問題推薦
添加回答
舉報
0/150
提交
取消