Metadata Consulting [dot] ca

Friday, May 29, 2020

CSharp - Bits per Pixel or bit depth in a Human Readable Format for an image

Here's how to get human readable bit depth or color depth for an image in C#.

Note: This is measure in bits not bytes, therefore the denominator is 1000 not 1024.

You can get bit depth using Windows API Code Pack, but understand JPEG do not use a color map, so it consistently returns 24 / 3 = 8bpp.

http://metadataconsulting.blogspot.com/2020/05/How-to-iterate-over-image-music-and-video-properties-using-Microsoft-Windows-API-Code-Pack.html

Update - Remove LINQ dependency use getting fast integer extraction

https://metadataconsulting.blogspot.com/2020/06/CSharp-Get-a-number-or-integer-from-a-string-fast-a-speed-comparison.html

        //Bits Per Pixel Number of Colors Available Common Name(s)
        //1 2 Monochrome
        //2 4 CGA
        //4 16 EGA
        //8 256 VGA
        //16 65536 XGA, High Color
        //24 16777216 SVGA, True Color
        //32 16777216 + Transparency  
        //48 281 Trillion
        //https://victoriaprice135050.wordpress.com/2015/04/30/investigate-bit-depth-sampling-bits-per-pixel-bpp-monochrome-256-high-colour-true-colour-2/

        //https://en.wikipedia.org/wiki/Tera-

        public static string ToBitsPerPixelDecimalReadableFormat(string numint)
        {
            string unit;
            double d;

            string firstnumber = new string(numint
                     .SkipWhile(x => !char.IsDigit(x))
                     .TakeWhile(x => char.IsDigit(x))
                     .ToArray());

            int pow = 0; //reqr'd -> cannot do a bitwise shift on a double
            int.TryParse(firstnumber , out pow);

            long abs = (pow < 0 ? -pow : pow);   //absolute value, we could use ulong if you know you will not get negative values

            if (pow == 0) return "1-bit";
            
            int bit = 1 << pow;
            abs = bit; 
            
            if (abs >= 0x1000000000000000) // Exa - quintillion
            {
                unit = "Quintn";
                d = (bit >> 50);
                d = (d / 1000);            // Divide by 1000 to get fractional value
            }
            else if (abs >= 0x4000000000000) // Peta - quadrillion
            {
                unit = "Quadn";
                d = (bit >> 40);
                d = (d / 1000);
            }
            else if (abs >= 0x10000000000) // Tera -  trillion
            {
                unit = "Tn";
                d = (bit >> 30);
                d = (d / 1000);
            }
            else if (abs >= 0x40000000) // Giga -  billion
            {
                unit = "B";
                d = (bit >> 20);
                d = (d / 1000);
            }
            else if (abs >= 0x100000) // Mega -  million
            {
                unit = "M";
                d = (bit >> 10);
                d = (d / 1000);
            }
            else if (abs >= 0x400) // Kilo -  thousand
            {
                unit = "K";
                d = bit;
                d = (d / 1000);
            }
            else
            {
                unit = "";
                d = bit;
            }

            return string.Concat(pow, "-bits(", d.ToString("0.##"), unit, "bpp)");


        }

Wednesday, May 27, 2020

CSharp - How to get either first row or last row of repeating values fast

Here's a design pattern to get either first or last row of a repeating field on multiple rows. In this example we'll extract the "File size" as a field to group on.

Internet media type                      : video/x-ms-wmv
File size                                : 372673
File size                                : 364 KiB
File size                                : 364 KiB
File size                                : 364 KiB
File size                                : 364 KiB
File size                                : 363.9 KiB
Duration                                 : 29166
Duration                                 : 29 s 166 ms
Duration                                 : 29 s 166 ms
Duration                                 : 29 s 166 ms
Duration                                 : 00:00:29.166
Duration                                 : 00:00:29:02
Duration                                 : 00:00:29.166 (00:00:29:02)
Overall bit rate                         : 102221
Overall bit rate                         : 102 kb/s
Maximum Overall bit rate                 : 103080
Maximum Overall bit rate                 : 103 kb/s

Result

Internet media type                      : video/x-ms-wmv
File size                                : 363.9 KiB
Duration                                 : 00:00:29.166 (00:00:29:02)
Overall bit rate                         : 102 kb/s
Maximum Overall bit rate                 : 103 kb/s

Here's a design pattern to get either 1st or last row of a repeating field, that is fast and does not rely on Linq library.

            string m = @"
Internet media type                      : video/x-ms-wmv
File size                                : 372673
File size                                : 364 KiB
File size                                : 364 KiB
File size                                : 364 KiB
File size                                : 364 KiB
File size                                : 363.9 KiB
Duration                                 : 29166
Duration                                 : 29 s 166 ms
Duration                                 : 29 s 166 ms
Duration                                 : 29 s 166 ms
Duration                                 : 00:00:29.166
Duration                                 : 00:00:29:02
Duration                                 : 00:00:29.166 (00:00:29:02)
Overall bit rate                         : 102221
Overall bit rate                         : 102 kb/s
Maximum Overall bit rate                 : 103080
Maximum Overall bit rate                 : 103 kb/s"; 
            
            string field = string.Empty;
            string prevfield = string.Empty;
            int idxsemi = 0;
            
            string[] linesIn = m.Split(new[] { Environment.NewLine },StringSplitOptions.None); 
            string[] linesOut = new string[linesIn.Length];
            
            int idxOut = linesIn.Length - 1;
            for (int i = linesIn.Length - 1; i >= 0; i--) //get last field in a repeating list
          //for (int i = 0; i < linesIn.Length; i++)     //get first field in a repeating list

                idxsemi = linesIn[i].IndexOf(':');
                if (idxsemi > -1)
                    field = linesIn[i].Substring(0, idxsemi - 1); //field to dedup
                else
                    field = linesIn[i];

                if (prevfield == field)
                    continue;

                linesOut[idxOut--] = linesIn[i];
                
                prevfield = field;
                
            }

            string final = string.Join(Environment.NewLine, linesOut).TrimStart(); //gets rid of head empty lines
        }
        
    }

Monday, May 25, 2020

How to iterate over image, music and video properties using Microsoft Windows API Code Pack

Here's how to iterate over SystemProperties.System.... properties to get metadata for an music, image or video file (there are many other filetypes as well) using Microsoft Windows API Code Pack.

From MS SystemProperties.System documentation
https://docs.microsoft.com/en-us/uwp/api/windows.storage.systemproperties?view=winrt-18362 we get following metadata categories to interrogate.

SystemProperties.System.

TABLE 2
Audio	Gets an object that provides the indexing names of Windows file properties for System.Audio.
Author	Gets the name of the System.Author property (one of the Windows file properties.
Comment	Gets the name of the System.Comment property (one of the Windows file properties.
GPS	Gets an object that provides the indexing names of Windows system file properties for System.GPS.
Image	Gets an object that provides the indexing names of Windows file properties for System.Image.
ItemNameDisplay	Gets the name of the System.ItemNameDisplay property (one of the Windows file properties.
Keywords	Gets the name of the System.Keywords property (one of the Windows file properties.
Media	Gets an object that provides the indexing names of system media file properties such as System.Media.Duration.
Music	Gets an object that provides the indexing names of Windows file properties for System.Music.
Photo	Gets an object that provides the indexing names of Windows file properties for System.Photo.
Rating	Gets the name of the System.Rating property (one of the Windows file properties.
Title	Gets the name of the System.Title property (one of the Windows file properties.
Video	Gets an object that provides the indexing names of Windows file properties for System.Video.

There is not collection that works for all the above categories, so I had to implement the following. There is a default collection and for photo only.

Warning! This code does not sniff the properties of underlying type format. So if you have an image that is webp, but mislabel as an jpg an imageisreallywebp.jpg or worse imageisreallywebp.webp (but is an exe) then this library will NOT detect that!

This code base is not maintained, and the BIT Depth for example is not reported accurately, test using samples here - https://etc.usf.edu/techease/win/images/what-is-bit-depth/

Code to iterate over Media Properties

Console.WriteLine("");
Console.WriteLine("SystemProperties.System.Media - Media Category Properties");
Console.WriteLine("");
string mediaProp = string.Empty;
PropertyInfo[] mediaPI = typeof(SystemProperties.System.Media).GetProperties();
foreach (PropertyInfo property in mediaPI)
{
 //https://docs.microsoft.com/en-us/windows/win32/properties/props-system-photo-aperture !@@@@@@@
 mediaProp = "System.Media." + property.Name; //@@@@@@@@@@@! This took  tooo long to figure out!
   
 //NOT -  mediaProp = "SystemProperties.System.Media"
 
 try
 {
  IShellProperty ishellprop = picture.Properties.GetProperty(mediaProp); //very slow

  if (ishellprop != null && ishellprop.ValueAsObject != null)
   Console.WriteLine(ishellprop.Name + "=" + ishellprop.ValueAsObject.ToString());

 }
 catch
 {
  continue;
 }

}

Saturday, May 23, 2020

The given CanonicalName is not valid ShellObject.Properties.GetProperty Microsoft Windows API Code Pack

If you are getting "The given CanonicalName is not valid" while using Microsoft Windows API Code Pack to get metadata about a image. Look no further, I have been racking my brains, over how to get this to work.

You probably have tried then following attempts that yielded the error. In this example, it's just trying to extract MIMEType, a field always populated and chosen for testing purposes.

This first attempt to get MimeType1 method uses the proper string signature to extract the MIMEType property, but you get the "The given CanonicalName is not valid" error. The second attempt also fails.

var MimeType1 = picture.Properties.GetProperty("SystemProperties.System.MIMEType").ToString(); //properly specified
var MimeType2 = picture.Properties.GetProperty("MIMEType"); //random attempt

So what is going on here?

This method which uses the PropertyKey type and works, and you think you could just get the string of this.

var MimeType1 = picture.Properties.GetProperty(SystemProperties.System.MIMEType).ToString(); //properly specified

But SystemProperties.System.MIMEType has no .CanonicalName property to use, or any named property and toString() will not work.

But it turns out that the documentation for ShellObject.Properties.GetProperty() is very hard to find! I can get valid types here.

So, you have to look at the source code of the Microsoft Windows API Code Pack.

And from there, you get that GetProperty is really a native win32 call to propsys.dll

        //PS refer to Propery Store - http://www.pinvoke.net/default.aspx/Interfaces/IPropertyStore.html
        [DllImport("propsys.dll", CharSet = CharSet.Unicode, SetLastError = true)]
        internal static extern int PSGetPropertyKeyFromName(
            [In, MarshalAs(UnmanagedType.LPWStr)] string pszCanonicalName,
            out PropertyKey propkey
        );

And at leas there some documentation of this call - https://docs.microsoft.com/en-us/windows/win32/api/propsys/nf-propsys-psgetnamefrompropertykey

From here it gives you a very vague idea of what to do.

Solution:

It turns out that you cannot use what you would naturally think;

"SystemProperties.System.MIMEType" must be "System.MIMEType"

"SystemProperties.System.Photo.CameraModel" must be "System.Photo.CameraModel"

The metadata image, video, music properties are all listed here to extract

https://docs.microsoft.com/en-us/windows/win32/properties/props

You may ask why use the string name, this is the only way to iterate over the collection, see my post here.

Wednesday, May 20, 2020

CSharp - Human readable video bit rate (bits per second) format

Format video bit rate measured in bits per second into "human readable" format, meaning that number is formatted as specified by metric SI convention using E,P,T,G,M,k prefix units.

"12344".ToBPSReadableFormat() outputs 12.3kbps

Quoting https://en.wikipedia.org/wiki/Metric_prefix for SI units;

exa	E	10¹⁸	1000000000000000000	quintillion
peta	P	10¹⁵	1000000000000000	quadrillion
tera	T	10¹²	1000000000000	trillion
giga	G	10⁹	1000000000	billion
mega	M	10⁶	1000000	million
kilo	k	10³	1000	thousand

This string extension method will format to bps to 1 decimal place.

Update June 1, 2020: You can combine this with fast integer extraction
https://metadataconsulting.blogspot.com/2020/06/CSharp-Get-a-number-or-integer-from-a-string-fast-a-speed-comparison.html

        /// <summary>
        /// Return bits per second formatted w/ metric SI units & to 1 decimal place - https://en.wikipedia.org/wiki/Metric_prefix
        /// </summary>
        /// <param name="numlong"></param>
        /// <returns></returns>
        public static string ToBPSReadableFormat(this string numlong)
        {
            string unit;
            double d;

            long l = 0; //reqr'd -> cannot do a bitwise shift on a double
            long.TryParse(numlong, out l);
            
            if (l==0) return "0bps";

            long abs = (l < 0 ? -l : l);   //absolute value, we could use ulong if you know you will not get negative values
            
            if (abs >= 0x1000000000000000) // Exabyte
            {
                unit = "Ebps";
                d = (l >> 50);             
                d = (d / 1000); 
            }
            else if (abs >= 0x4000000000000) // Petabyte
            {
                unit = "Pbps";
                d = (l >> 40);
                d = (d / 1000); // Divide by 1000 to get fractional value
            }
            else if (abs >= 0x10000000000) // Terabyte
            {
                unit = "Tbps";
                d = (l >> 30);
                d = (d / 1000);
            }
            else if (abs >= 0x40000000) // Gigabyte
            {
                unit = "Gbps";
                d = (l >> 20);
                d = (d / 1000);
            }
            else if (abs >= 0x100000) // Megabyte
            {
                unit = "Mbps";
                d = (l >> 10);
                d = (d / 1000);
            }
            else if (abs >= 0x400) // Kilobyte
            {
                unit = "kbps";
                d = l;
                d = (d / 1000);
            }
            else
            {
                unit = "bps";
                d = l;
            }
          
            return string.Concat(d.ToString("0.#"),unit);
        }

C# Get the number of pages in a PDF document from metadata without load entire file

A popular solution is to use pdfinfo and pump it through a command line to get the number of pages in a PDF. However, if you examine the source code, https://dl.xpdfreader.com/xpdf-4.02.tar.gz you'll see that this reads the entire PDF to determine the page count. Therefore, the load time will be indeterminate, esp. slow for large files.

Here's a way to read the page count of a PDF using the metadata in a fixed amount of time.
BUT BE ADVISED, this only works for some versions of PDF encoding versions. It does not guarantee a returned result. For that you need to read entire file and iterate over this regex - "/\/Count\s+(\d+)/".

However, this works in a preset amount of time 10ms, since it reads only the first 32,767 bytes of the file. It then matches some possible patterns for the page count. Most common are matched first.

Please add more in the comments section. You can find patterns by inspect the PDF file for patterns.

You can inspect PDF files using Frhed a free hex/binary editor for Windows, that will open a PDF file, and you can view the metadata for youself. Or test drive my Clipboard Plaintext Power Tool which has a custom version of Frhed built in.

const string strRegexNT = @"\/N\s*(\d*)\s*\/[T|O]\s"; //Seems to be most common found and reliable
private static readonly Regex rgxNT = new Regex(strRegexNT, RegexOptions.Multiline | RegexOptions.CultureInvariant | RegexOptions.Compiled);

const string strRegexPRT = @"\/Pages\s*(\d*).*R.*\/T";
private static readonly Regex rgxPRT = new Regex(strRegexPRT, RegexOptions.Multiline | RegexOptions.CultureInvariant | RegexOptions.Compiled); 

const string strRegexTPC = @"\<\<\/Type\/Pages\/Count\s*(\d*)\s*\/Kids";
private static readonly Regex rgxTPC = new Regex(strRegexTPC, RegexOptions.Multiline | RegexOptions.CultureInvariant | RegexOptions.Compiled);

const string strRegexEndObj = @"endobj\s*7\s*0\s*obj\s*\<\<\s*\/Count\s*(\d*)\s*\/";
private static readonly Regex rgxEndObj = new Regex(strRegexEndObj, RegexOptions.Multiline | RegexOptions.CultureInvariant | RegexOptions.Compiled);

/// <summary>
/// Gets number of PDF pages reading only 1st 32767 bytes in 10ms, should cover most cases
/// https://metadataconsulting.blogspot.com/2020/05/C-Get-the-number-of-pages-in-a-PDF-document-from-metadata-without-load-entire-file.html
/// </summary>
/// <param name="fileName"></param>
/// <returns></returns>
public static string GetNumofPdfPages(string fileName)
{
    string o = string.Empty;
    string head = string.Empty;
    Match m;

    try //for Openread
    {
        using (BinaryReader br = new BinaryReader(File.OpenRead(fileName)))
        {
            head = Encoding.UTF8.GetString(br.ReadBytes(Int16.MaxValue)); //32767
        }
    }
    catch { }
    
    if (!string.IsNullOrEmpty(head))
    {

        m = rgxNT.Match(head);
        if (m.Groups.Count == 2)
            o = m.Groups[1].Value;

        if (string.IsNullOrEmpty(o))
        {

            m = rgxPRT.Match(head);
            if (m.Groups.Count == 2)
                o = m.Groups[1].Value;

        }

        if (string.IsNullOrEmpty(o))
        {

            m = rgxTPC.Match(head);
            if (m.Groups.Count == 2)
                o = m.Groups[1].Value;

        }
       

        if (string.IsNullOrEmpty(o)) {

            m = rgxEndObj.Match(head);
            if (m.Groups.Count == 2)
                o = m.Groups[1].Value;

        }
    
    }
    return o;
}

Saturday, May 16, 2020

Getting the Clipboard File DropEffect in CSharp (C#)

How to access the file DropEffect in c# in order to tell cut from paste for a file DropList.

string[] fileList = iData.GetData(DataFormats.FileDrop) as string[];
Object objDropEffect = Clipboard.GetData("Preferred DropEffect"); //get drop effect that was put on keyboard
DragDropEffects dropEffect = DragDropEffects.Copy; //set default 
int dropme = 0; //https://docs.microsoft.com/en-us/dotnet/api/system.windows.forms.dragdropeffects?view=netcore-3.1

if (objDropEffect.GetType() == typeof(MemoryStream)) //casting check
{ 
 MemoryStream ms = new MemoryStream();  
 try
 {
  ms = (MemoryStream)objDropEffect; //cast should be safe now

  
  dropme = ms.ReadByte(); //we read first byte of internal _buffer of this object objDropEffect which is 4 element byte array
                          //to read all 4 bytes to get LINK effect as well use byte[] buffer = new byte[4];
  dropEffect = (DragDropEffects)dropme; //cast into DragDropEffects enum CUT (Move) & PASTE (copy)

 }
 catch 
 {}
 finally 
 {
  ms.Dispose(); //we cannot use a using statement because of casting (MemoryStream)
 }

}

StringCollection strcolFileList = new StringCollection();
strcolFileList.AddRange(fileList);

try
{
        dataObj.SetFileDropList(strcolFileList);

        dataObj.SetData("Preferred DropEffect", dropEffect); //
}
catch {}

//https://csharp.hotexamples.com/site/file?hash=0xe453aa34f981d998492adf71d35b84904e1c92052c44966d047a6e8da3c6a81e&fullName=Other/Altaxo/AltaxoSource-0.54.0.554/AltaxoSource/Libraries/ICSharpCode.TextEditor/Project/Src/TextAreaClipboardHandler.cs&project=xuchuansheng/GenXSource
// Work around ExternalException bug. (SD2-426)
// Best reproducable inside Virtual PC.
try
{
 Clipboard.SetDataObject(dataObj);
}
catch (ExternalException)
{
 Application.DoEvents();
 try
 {
  Clipboard.SetDataObject(dataObj);
 }
 catch (ExternalException)
 {
     string error= "Drag'n Drop failed to be set.";
  
 }
}
catch
{
 string error = "Drag'n Drop failed to be set.";
}

Friday, May 15, 2020

CSharp How to get Clipboard incoming DragDropEffects for FileDrop and set in back

This is a C# working implementation of getting and setting DropEffect for a FileDrop to and from the Clipboard.

Here's a snippet from the DragDropEffect documentation
https://docs.microsoft.com/en-us/dotnet/api/system.windows.forms.dragdropeffects

All	-2147483645	The combination of the Copy, Move, and Scroll effects.
Copy	1	The data from the drag source is copied to the drop target.
Link	4	The data from the drag source is linked to the drop target.
Move	2	The data from the drag source is moved to the drop target.
None	0	The drop target does not accept the data.
Scroll	-2147483648	The target can be scrolled while dragging to locate a drop position that is not currently visible in the target.

string[] fileList = iData.GetData(DataFormats.FileDrop) as string[];
Object objDropEffect = Clipboard.GetData("Preferred DropEffect"); //get drop effect that was put on keyboard
DragDropEffects dropEffect = DragDropEffects.Copy; //set default 
int dropme = 0; //https://docs.microsoft.com/en-us/dotnet/api/system.windows.forms.dragdropeffects?view=netcore-3.1

if (objDropEffect.GetType() == typeof(MemoryStream)) //casting check
{ 
 MemoryStream ms = new MemoryStream();  
 try
 {
  ms = (MemoryStream)objDropEffect; //cast should be safe now
  dropme = ms.ReadByte(); //we read first byte of internal _buffer of this object objDropEffect which is 4 element byte array
  dropEffect = (DragDropEffects)dropme; //cast into enum 
 }
 catch 
 {}
 finally 
 {
  ms.Dispose(); //we cannot use a using statement because of casting (MemoryStream)
 }

}

StringCollection strcolFileList = new StringCollection();
strcolFileList.AddRange(fileList);

try
{
        dataObj.SetFileDropList(strcolFileList);

        dataObj.SetData("Preferred DropEffect", dropEffect); //on Win7, this might have to be null to perform a copy
}
catch {}

//https://csharp.hotexamples.com/site/file?hash=0xe453aa34f981d998492adf71d35b84904e1c92052c44966d047a6e8da3c6a81e&fullName=Other/Altaxo/AltaxoSource-0.54.0.554/AltaxoSource/Libraries/ICSharpCode.TextEditor/Project/Src/TextAreaClipboardHandler.cs&project=xuchuansheng/GenXSource
// Work around ExternalException bug. (SD2-426)
// Best reproducable inside Virtual PC.
try
{
 Clipboard.SetDataObject(dataObj);
}
catch (ExternalException)
{
 Application.DoEvents();
 try
 {
  Clipboard.SetDataObject(dataObj);
 }
 catch (ExternalException)
 {
     string error= "Drag'n Drop failed to be set.";
  
 }
}
catch
{
 string error = "Drag'n Drop failed to be set.";
}

My Cool Apps

Path Too Long Auto Fixer find, report, fix syncing issues to the cloud, desktops, severs, HDs, SSDs, USBs, SANs

Clipboard Plaintext Power Tool turbo charge your clipboard with transform functions, best in the world

Solo SCRUM Sprinter App enforce task tracking every 30mins, Outlook calendar booking automation

Registry Viewer read-only registry viewer

eyeBreak 20-20-20 Rule App enforce eye break every 20mins

RegToText convert registry values to be human readable

Metadata Consulting [dot] ca - Blog

Pages