Here's some 'vibe coding' to produce a fast C++ code to calculate average size of Office documents .docx, xls, ppt and pdfs for your organization. It scans a drive letter, here's a command line example.
c:\>calcavgsizedocs.exe c:\
This code features:
- Long path support (\\?\)
- Skip reparse points / junctions to avoid infinite loops
- Unicode-safe console output
- Multi-threaded scanning + progress + current folder
- Total docs scanned with thousands separators
- Ctrl-C gracefully exits
- VS2010-compatible, no C4996 warnings
5$ USD for a compiled pro version with help exe, email at
Pro Version
Usage: CalcAvgSizeDocs <drive_letter>
Example: CalcAvgSizeDocs C:
Options:
-h, --help Show this help message
-v, --version Show version info
-s, --silent Silence current progress output
This program is provided "AS-IS", without warranty of
merchantability or fitness for a particular purpose.
C++ Code basic version
// CalcAvgSizeDocs.cpp : Defines the entry point for the console application.
//
#include "stdafx.h" required for VisualStudio 2010 win32 console app, with header files or remove
#include <windows.h>
#include <string>
#include <iostream>
#include <vector>
#include <io.h>
#include <fcntl.h>
// ------------------------------------------------------------
// Global Data Structures
// ------------------------------------------------------------
struct Stats
{
volatile unsigned __int64 count;
volatile unsigned __int64 totalBytes;
Stats() : count(0), totalBytes(0) {}
};
Stats docxStats, xlsxStats, pptxStats, pdfStats;
// Work queue
std::vector<std::wstring> dirQueue;
CRITICAL_SECTION queueLock;
// Progress & control
volatile LONG totalDirsQueued = 0;
volatile LONG totalDirsProcessed = 0;
volatile bool scanningFinished = false;
volatile bool cancelRequested = false;
// Current folder display
std::wstring currentFolder;
CRITICAL_SECTION currentFolderLock;
// Total documents scanned
volatile unsigned __int64 totalDocsScanned = 0;
// ------------------------------------------------------------
// Utility
// ------------------------------------------------------------
bool EndsWith(const std::wstring& s, const std::wstring& ext)
{
if (s.length() < ext.length()) return false;
return _wcsicmp(s.c_str() + (s.length() - ext.length()), ext.c_str()) == 0;
}
void Accumulate(Stats &stat, unsigned __int64 size)
{
InterlockedIncrement((LONG*)&stat.count);
InterlockedExchangeAdd64((LONGLONG*)&stat.totalBytes, size);
InterlockedExchangeAdd64((LONGLONG*)&totalDocsScanned, 1);
}
void ProcessFile(const std::wstring& path)
{
WIN32_FILE_ATTRIBUTE_DATA fad;
if (!GetFileAttributesExW(path.c_str(), GetFileExInfoStandard, &fad))
return;
unsigned __int64 size =
(((unsigned __int64)fad.nFileSizeHigh) << 32) | fad.nFileSizeLow;
if (EndsWith(path, L".docx"))
Accumulate(docxStats, size);
else if (EndsWith(path, L".xlsx"))
Accumulate(xlsxStats, size);
else if (EndsWith(path, L".pptx"))
Accumulate(pptxStats, size);
else if (EndsWith(path, L".pdf"))
Accumulate(pdfStats, size);
}
// Format time in HH:MM:SS.mmm
std::wstring FormatDuration(DWORD ms)
{
DWORD seconds = ms / 1000;
DWORD minutes = seconds / 60;
DWORD hours = minutes / 60;
seconds %= 60;
minutes %= 60;
wchar_t buf[64];
swprintf(buf, 64, L"%02u:%02u:%02u.%03u",
hours, minutes, seconds, ms % 1000);
return buf;
}
// Format numbers with thousands separators
std::wstring FormatWithThousandsSeparator(unsigned __int64 value)
{
wchar_t buf[64];
_ui64tow_s(value, buf, 64, 10);
std::wstring s(buf);
int insertPosition = (int)s.length() - 3;
while (insertPosition > 0)
{
s.insert(insertPosition, L",");
insertPosition -= 3;
}
return s;
}
// ------------------------------------------------------------
// CTRL-C Handler
// ------------------------------------------------------------
BOOL WINAPI CtrlHandler(DWORD ctrlType)
{
if (ctrlType == CTRL_C_EVENT ||
ctrlType == CTRL_BREAK_EVENT ||
ctrlType == CTRL_CLOSE_EVENT)
{
cancelRequested = true;
scanningFinished = true;
std::wcout << L"\n\nCTRL-C detected... stopping scan...\n";
return TRUE;
}
return FALSE;
}
// ------------------------------------------------------------
// Worker Thread
// ------------------------------------------------------------
DWORD WINAPI WorkerProc(LPVOID)
{
for (;;)
{
if (cancelRequested)
break;
std::wstring folder;
EnterCriticalSection(&queueLock);
if (!dirQueue.empty())
{
folder = dirQueue.back();
dirQueue.pop_back();
}
else
{
LeaveCriticalSection(&queueLock);
break;
}
LeaveCriticalSection(&queueLock);
InterlockedIncrement(&totalDirsProcessed);
EnterCriticalSection(¤tFolderLock);
currentFolder = folder;
LeaveCriticalSection(¤tFolderLock);
if (cancelRequested)
break;
// Prepend \\?\ for long path support
std::wstring longPath = L"\\\\?\\" + folder;
std::wstring searchPath = longPath + L"\\*";
WIN32_FIND_DATAW fd;
HANDLE hFind = FindFirstFileW(searchPath.c_str(), &fd);
if (hFind == INVALID_HANDLE_VALUE)
continue;
do
{
if (cancelRequested)
break;
const wchar_t* name = fd.cFileName;
if (wcscmp(name, L".") == 0 || wcscmp(name, L"..") == 0)
continue;
std::wstring fullPath = folder + L"\\" + name;
// Skip junctions / reparse points to avoid loops
if ((fd.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) &&
!(fd.dwFileAttributes & FILE_ATTRIBUTE_REPARSE_POINT))
{
EnterCriticalSection(&queueLock);
dirQueue.push_back(fullPath);
LeaveCriticalSection(&queueLock);
InterlockedIncrement(&totalDirsQueued);
}
else if (!(fd.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY))
{
ProcessFile(fullPath);
}
} while (FindNextFileW(hFind, &fd));
FindClose(hFind);
}
return 0;
}
// ------------------------------------------------------------
// Progress Thread
// ------------------------------------------------------------
DWORD WINAPI ProgressProc(LPVOID)
{
while (!scanningFinished && !cancelRequested)
{
LONG done = totalDirsProcessed;
LONG total = totalDirsQueued;
double pct = (total > 0) ? (done * 100.0 / total) : 0.0;
EnterCriticalSection(¤tFolderLock);
std::wcout << L"\rProcessed: " << done
<< L"/" << total
<< L" (" << (int)pct << L"%) "
<< L"Current: " << currentFolder
<< std::flush;
LeaveCriticalSection(¤tFolderLock);
Sleep(500);
}
return 0;
}
// ------------------------------------------------------------
// Print Stats
// ------------------------------------------------------------
void PrintStats(const wchar_t* label, const Stats& s)
{
if (s.count == 0)
{
std::wcout << label << L": No files found\n";
return;
}
double avg = (double)s.totalBytes / (double)s.count;
double kb = avg / 1024.0;
std::wcout << label << L": " << kb
<< L" KB average (" << s.count << L" files)\n";
}
// ------------------------------------------------------------
// MAIN
// ------------------------------------------------------------
int wmain(int argc, wchar_t* argv[])
{
// Enable UTF-8 output
SetConsoleOutputCP(CP_UTF8);
_setmode(_fileno(stdout), _O_U8TEXT);
SetConsoleCtrlHandler(CtrlHandler, TRUE);
DWORD startTime = GetTickCount();
// Determine drive
std::wstring root;
if (argc >= 2)
{
std::wstring arg = argv[1];
if (arg.length() == 1)
arg += L":\\";
else if (arg.length() == 2 && arg[1] == L':')
arg += L"\\";
else if (arg[arg.length() - 1] != L'\\')
arg += L"\\";
root = arg;
}
else
{
root = L"C:\\";
}
std::wcout << L"Scanning drive: " << root << L"\n";
InitializeCriticalSection(&queueLock);
InitializeCriticalSection(¤tFolderLock);
dirQueue.push_back(root);
InterlockedIncrement(&totalDirsQueued);
HANDLE hProgress = CreateThread(NULL, 0, ProgressProc, NULL, 0, NULL);
SYSTEM_INFO si;
GetSystemInfo(&si);
int numThreads = si.dwNumberOfProcessors;
std::vector<HANDLE> threads;
for (int i = 0; i < numThreads; ++i)
{
HANDLE h = CreateThread(NULL, 0, WorkerProc, NULL, 0, NULL);
threads.push_back(h);
}
WaitForMultipleObjects((DWORD)threads.size(), &threads[0], TRUE, INFINITE);
scanningFinished = true;
WaitForSingleObject(hProgress, INFINITE);
DeleteCriticalSection(&queueLock);
DeleteCriticalSection(¤tFolderLock);
DWORD endTime = GetTickCount();
DWORD elapsed = endTime - startTime;
std::wcout << L"\n\nResults (partial if cancelled):\n";
PrintStats(L"DOCX", docxStats);
PrintStats(L"XLSX", xlsxStats);
PrintStats(L"PPTX", pptxStats);
PrintStats(L"PDF ", pdfStats);
std::wcout << L"\nTotal documents scanned: "
<< FormatWithThousandsSeparator(totalDocsScanned)
<< L"\n";
std::wcout << L"\nTotal execution time: "
<< FormatDuration(elapsed) << L"\n";
if (cancelRequested)
std::wcout << L"\nScan cancelled by user.\n";
return 0;
}
D:\ Drive Results
C:\Users\Markus\Documents\Visual Studio 2010\Projects\CalcAvgSizeDocs\Release>calcavgsizedocs -s d
Scanning drive: d:\
Drive size: 1.09 TB
.DOCX: 125.60 KB average (230 files)
.XLSX: 321.00 KB average (47 files)
.PPTX: 11.63 MB average (11 files)
.PDF : 2.82 MB average (1625 files)
Total documents scanned: 1,913
Total execution time: 00:01:14.537
C:\ Drive Results (pro code output)
C:\Users\Markus\Documents\Visual Studio 2010\Projects\CalcAvgSizeDocs\Release>calcavgsizedocs -s c
Scanning drive: c:\
Drive size: 931.41 GB
Total Folders Processed: 221515
Complete Results:
.DOCX: 364.55 KB average (1782 files)
.XLSX: 109.95 KB average (565 files)
.PPTX: 2.05 MB average (205 files)
.PDF : 1.54 MB average (8179 files)
Total documents scanned: 10,731
Total execution time: 01:00:03.951
Vibe coded new solution 100x faster
C:\Users\Markus\Documents\Visual Studio 2010\Projects\avgsizedocs\Debug>avgsizedocs d:\
Scanning: d:\
Folders: 70013 | Files: 606999
----------------------------------------
Total Folders: 70,751
Total Files : 607,842
Elapsed Time : 0h 1m 40s
----------------------------------------
.DOCX: Count=137, Total=47.00 MB, Avg=351.33 KB
.XLSX: Count=92, Total=53.95 MB, Avg=600.50 KB
.PPTX: Count=24, Total=23.31 MB, Avg=994.40 KB
.PDF : Count=1476, Total=2.85 GB, Avg=1.97 MB
Please post your results in comments.