Here's the fastest way to determine if a file is a PDF or not.
To determine a file is of a PDF format, we need only read the first 4 bytes of a file and if it matches '%PDF', then we have determined the file is a PDF file. This runs on average about 400 ticks or .04 milliseconds.
Here's an sample PDF document opened in Frhed.
The first few bytes of a file is called the header of the file, and each file type has it's own signature. You can determine the file type with their corresponding file signatures. A free utility here.
Here's the C# code
using System;using System.IO;using System.Diagnostics; public class Program { public static void Main() { Stopwatch sw = new Stopwatch(); //string sourceFile = @"D:\CMarkusDownloads\88224835-Ataatapi-Command-Set-2-Acs-2-Rev4.pdf"; //~400 ticks string sourceFile = @"D:\CMarkusDownloads\grepWin-2.0.0.msi"; //~400 ticks byte[] buffer = new byte[4]; //Create a 4 byte buffer, buffer.Length = 4
int bufferLength = 0; //buffer read length
FileStream fr = null; sw.Start(); try { fr = new FileStream(sourceFile, FileMode.Open, FileAccess.Read); //Open source file for reading bufferLength = fr.Read(buffer, 0, buffer.Length); //Read 4 bytes from source file //Console.WriteLine("Read 1st "+bufferLength+" bytes."); //slows down timing // Check the first 4 bytes, matching '%PDF' string in bytes if (bufferLength == 4 && buffer[0] == 0x25 && buffer[1] == 0x50 && buffer[2] == 0x44 && buffer[3] == 0x46) { // It's most probably a PDF file sw.Stop(); Console.WriteLine(sourceFile + " is a PDF file in " + sw.ElapsedTicks + " ticks."); } else { sw.Stop(); Console.WriteLine(sourceFile + " is a not PDF file in " + sw.ElapsedTicks + " ticks."); } } catch (Exception ex) { sw.Stop(); Console.WriteLine(ex.Message + " in " + sw.ElapsedTicks+ " ticks."); //Catch exception (if any) and display to user } finally { fr.Dispose(); } Console.ReadKey(); } }
No comments:
Post a Comment