Tuesday, May 28, 2024

C# dotNet The fastest way to check if a file is a PDF file or not


Here's the fastest way to determine if a file is a PDF or not. 

To determine a file is of a PDF format, we need only read the first 4 bytes of a file and if it matches '%PDF', then we have determined the file is a PDF file. This runs on average about 400 ticks or .04 milliseconds.

The first few bytes of a file is called the header of the file, and each file type has it's own signature. You can determine the file type with their corresponding file signatures. A free utility here.


using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.IO;
using System.Diagnostics; 

namespace PDFirst4Bytes
{
    class Program
    {
        static void Main(string[] args)
        {
            Stopwatch sw = new Stopwatch(); 
            //string sourceFile = @"D:\CMarkusDownloads\88224835-Ataatapi-Command-Set-2-Acs-2-Rev4.pdf"; //~400 ticks
            string sourceFile = @"D:\CMarkusDownloads\grepWin-2.0.0.msi"; //~400 ticks
            

            byte[] buffer = new byte[4];                                                                    //Declare 4 byte for read buffer
            int bufferLength = 0;                                                                           //Declare total bytes read

            sw.Start(); 
            try
            {
                using (var fr = new FileStream(sourceFile, FileMode.Open, FileAccess.Read))                 //Open source file for reading
                {
                    bufferLength = fr.Read(buffer, 0, buffer.Length);                                       //Read 4 bytes from source file
                    //Console.WriteLine("Read 1st "+bufferLength+" bytes.");                                //slows down timing
                }
            
                // Check the first 4 bytes, matching '%PDF' string in bytes
                if (bufferLength == 4 && buffer[0] == 0x25 && buffer[1] == 0x50 && buffer[2] == 0x44 && buffer[3] == 0x46)
                {
                    // It's most probably a PDF file
                    sw.Stop();
                    Console.WriteLine(sourceFile + " is a PDF file in " + sw.ElapsedTicks + " ticks.");
                }
                else
                {
                    sw.Stop();
                    Console.WriteLine(sourceFile + " is a not PDF file in " + sw.ElapsedTicks + " ticks.");
                }
            }
            catch (Exception ex)
            {
                sw.Stop();
                Console.WriteLine(ex.Message + " in " + sw.ElapsedTicks+ " ticks.");                                   //Catch exception (if any) and display to user
            }

            Console.ReadKey(); 
        }

    }
}

No comments:

Post a Comment