Java Script to Extract SSNs

Java Script to Extract SSNs

December 31, 2019

Tonight, I tested out a Java script posted here by EverMap which is used to extract social security numbers from PDFs.  My test set has SSNs listed in the first page of a short document; in a table in another short document; and hidden in a very long document.  The script found each SSN. 

 

Enter the below script in Adobe Acrobat by going to Tools . . . Action Wizard and clicking 'Create New Action . . . '.   Under 'More Tools', in the 'Choose tools to add' section, click on 'Execute Javascript', then uncheck 'Prompt User' and click on 'Specify Settings' on the right. 

 

 Enter the script in the JavaScript Editor . . . 

 

 

 

Click OK and save and rename the new action. 

 

Add the files you want to process and then click start.   A new PDF will be generated for each source PDF that lists each of their SSNs.

 

 

 

 

 

/* Extract US Social Security Numbers From the Document */
// This script will scan all pages of the input document
// and extract :
// Social security numbers: 
// Output PDF document will be placed in the same folder
// as input. The name of the output document will be:
// Original filename + "_Extracted_SSNs"
// Visit www.evermap.com for more useful JavaScript samples.

var reMatch=/(?!000)([0-6]\d{2}|7([0-6]\d|7[012]))([ -])(?!00)\d\d\3(?!0000)\d{4}/g;

var strExt = "_Extracted_SSNs.pdf";
var strIntro = "Social Security Numbers extracted from document: ";
var strFinal = "Total number of SSNs extracted: " ;

ExtractFromDocument(reMatch,strExt,strIntro,strFinal);

function ExtractFromDocument(reMatch, strFileExt, strMessage1, strMessage2)
{
var chWord, numWords;

// construct filename for output document
var filename = this.path.replace(/\.pdf$/, strFileExt);

// create a report document
try {
    var ReportDoc = new Report();
    var Out = new Object(); // array where we will collect all our emails before outputing them
    
    ReportDoc.writeText(strMessage1 + this.path);
    ReportDoc.divide(1);      // draw a horizontal divider
    ReportDoc.writeText(" "); // write a blank line to output
    
    for (var i = 0; i < this.numPages; i++)
    {
        numWords = this.getPageNumWords(i);
        var PageText = "";
        for (var j = 0; j < numWords; j++) {
            var word = this.getPageNthWord(i,j,false);
            PageText += word;
            }
        var strMatches = PageText.match(reMatch);
        if (strMatches == null) continue;
        // now output matches into report document
        for (j = 0; j < strMatches.length; j++) {
            Out[strMatches[j]] = true; // store email as a property name
            }
    }
    
    var nTotal = 0;
    for (var prop in Out) 
    {
        ReportDoc.writeText(prop);
        nTotal++;
    }
    
    ReportDoc.writeText(" "); // output extra blank line
    ReportDoc.divide(1); // draw a horizontal divider
    ReportDoc.writeText(strMessage2 + nTotal);
    
    // save report to a document
    ReportDoc.save(
        {
        cDIPath: filename
        });

}
catch(e)
{
app.alert("Processing error: "+e)
}
    
} // end of the function
 

Please reload

Some elements on this page did not load. Refresh your site & try again.

Contact Me With Your Litigation Support Questions:

seankevinoshea@hotmail.com

  • Twitter Long Shadow

© 2015 by Sean O'Shea . Proudly created with Wix.com