Here's the fastest way to determine if a file is a PDF or not.
To determine a file is of a PDF format, we need only read the first 4 bytes of a file and if it matches '%PDF', then we have determined the file is a PDF file. This runs on average about 400 ticks or .04 milliseconds.
The first few bytes of a file is called the header of the file, and each file type has it's own signature. You can determine the file type with their corresponding file signatures. A free utility here.
using System; using System.Collections.Generic; using System.Linq; using System.Text; using System.IO; using System.Diagnostics; namespace PDFirst4Bytes { class Program { static void Main(string[] args) { Stopwatch sw = new Stopwatch(); //string sourceFile = @"D:\CMarkusDownloads\88224835-Ataatapi-Command-Set-2-Acs-2-Rev4.pdf"; //~400 ticks string sourceFile = @"D:\CMarkusDownloads\grepWin-2.0.0.msi"; //~400 ticks byte[] buffer = new byte[4]; //Declare 4 byte for read buffer int bufferLength = 0; //Declare total bytes read sw.Start(); try { using (var fr = new FileStream(sourceFile, FileMode.Open, FileAccess.Read)) //Open source file for reading { bufferLength = fr.Read(buffer, 0, buffer.Length); //Read 4 bytes from source file //Console.WriteLine("Read 1st "+bufferLength+" bytes."); //slows down timing } // Check the first 4 bytes, matching '%PDF' string in bytes if (bufferLength == 4 && buffer[0] == 0x25 && buffer[1] == 0x50 && buffer[2] == 0x44 && buffer[3] == 0x46) { // It's most probably a PDF file sw.Stop(); Console.WriteLine(sourceFile + " is a PDF file in " + sw.ElapsedTicks + " ticks."); } else { sw.Stop(); Console.WriteLine(sourceFile + " is a not PDF file in " + sw.ElapsedTicks + " ticks."); } } catch (Exception ex) { sw.Stop(); Console.WriteLine(ex.Message + " in " + sw.ElapsedTicks+ " ticks."); //Catch exception (if any) and display to user } Console.ReadKey(); } } }
No comments:
Post a Comment