Person table based on census data

Introduction
This article will provide an example of how to create a person table based on US census data that is evenly distributed. Data was obtained from the following resources.

United States Census Bureau United States Social Security Administration

Example
The following steps were followed to create this table - beware it takes a very long time...

USER>Do ##class(Census.Person).Import USER>Do ##class(Census.Person).Process USER>Do ##class(Census.Person).Update

USER>Do ##class(Census.Person).%PurgeIndices USER>Do ##class(Census.Person).%BuildIndices

Code
The code to generate this data is as follows.

Class Utils.Logging [ Abstract ] {

ClassMethod Console(message As %String, prefix As %String = "", suffix As %String = "", terminator As %String = {$Char(13,10)}) As %Status {	If $ZParent Quit $$$ERROR($$$GeneralError, "Background process") If '$Data(message) Write # Quit $$$OK If '$Data(terminator) Set terminator=$Char(13,10) Write $Get(prefix), message, $Get(suffix), terminator Quit $$$OK }

}

Class Census.Person Extends (%Persistent, Utils.Logging, Utils.Phonetic) {

Property NameFirst As %String(MAXLEN = "");

Property NameLast As %String(MAXLEN = "");

Property NameFirstEnc As %String(COLLATION = "SQLUPPER(7)") [ SqlComputeCode = {Set {NameFirstEnc} = ..ToNYSIIS({NameFirst})}, SqlComputed, SqlComputeOnChange = NameFirst ];

Property NameLastEnc As %String(COLLATION = "SQLUPPER(7)") [ SqlComputeCode = {Set {NameLastEnc} = ..ToNYSIIS({NameLast})}, SqlComputed, SqlComputeOnChange = NameLast ];

Index NameFirstEncIdx On NameFirstEnc [ Type = bitmap ];

Index NameLastEncIdx On NameLastEnc [ Type = bitmap ];

ClassMethod Import As %Status {	/*		United States Census Bureau - http://www.census.gov/genealogy/www/data/2000surnames/names.zip

United States Social Security Administration - http://www.ssa.gov/oact/babynames/names.zip */	// display message on console Do ..Console Do ..Console("Importing names")

// do some initialisation Kill ^CacheTempNameImport Set rc=$SortBegin(^CacheTempNameImport) Do ..Console(rc, "$SortBegin=") Set counter=0 // read in last names Do ..Console("Loading last names") Set last=##class(%Stream.FileCharacter).%New Set last.Filename="C:\Temp\Names\app_c.csv" Do ..Console(last.Filename, "fn=") While 'last.AtEnd { Set line=last.ReadLine If $Piece(line, ",")="name" Continue Set name=$Piece(line, ",") Set rank=$Piece(line, ",", 2) Set count=+$Piece(line, ",", 3) Set ^CacheTempNameImport("last", rank)=$lb(name, count) }	// read in first names (by year of birth) Do ..Console("Loading first names") Set fs=##class(%ResultSet).%New("%File.FileSet") Set sc=fs.Execute("C:\Temp\Names\", "yob*.txt") While (fs.Next) { If (fs.Type'="F") || '(fs.Size) Continue Set sc=..ImportFirst(fs.Name) }	// now finished Do ..Console("", "$SortEnd=",, "") Set rc=$SortEnd(^CacheTempNameImport) Do ..Console($FNumber(rc, ",")) Quit $$$OK }

ClassMethod ImportFirst(fname) As %Status [ Private ] {	// read in first names Set first=##class(%Stream.FileCharacter).%New Set first.Filename=fname Do ..Console(first.Filename, "fn=") While 'first.AtEnd { Set line=first.ReadLine Set ckey=$Piece(line, ",", 1, 2) Set count=$Get(^CacheTempNameImport("first", ckey)) Set count=count+$Piece(line, ",", 3) Set ^CacheTempNameImport("first", ckey)=count }	Quit $$$OK }

ClassMethod Process As %Status {	// display message on console Do ..Console Do ..Console("Processing names")

// do some initialisation Do ..Console("Starting first names") Kill ^CacheTempNameFirst Set rc=$SortBegin(^CacheTempNameFirst) Do ..Console(rc, "$SortBegin=") Set counter=0, zhbegin=$ZHoroLog // work though first names Set ckey=$Order(^CacheTempNameImport("first", ""), 1, count) Do { For i=1:1:count { For { Set rand=$Random(1e16) If '$Data(^CacheTempNameFirst(rand)) Quit }			Set ^CacheTempNameFirst(rand)=ckey Set counter=$Increment(counter) If counter#1e4=0 Do ..Console(".",,, "") If counter#1e5=0 Do ..Console($Piece(ckey, ","),,, "") }		Set ckey=$Order(^CacheTempNameImport("first", ckey), 1, count) }	While ckey'="" // provide an update Do ..Console("") Do ..Console("", "$SortEnd=",, "") Set rc=$SortEnd(^CacheTempNameFirst) Do ..Console($FNumber(rc, ",")) Set taken=$ZHoroLog-zhbegin Do ..Console($FNumber(taken/60, "", 0), "Duration=", " minutes") // do some initialisation Do ..Console("Starting last names") Kill ^CacheTempNameLast Set rc=$SortBegin(^CacheTempNameLast) Do ..Console(rc, "$SortBegin=") Set counter=0, zhbegin=$ZHoroLog Set lastrank=$Order(^CacheTempNameImport("last", ""), -1)+1 // work though last names Set rank=$Order(^CacheTempNameImport("last", ""), 1, data) Do { Set name=$li(data), count=$li(data, 2) For i=1:1:count { For { Set rand=$Random(1e16) If '$Data(^CacheTempNameLast(rand)) Quit }			Set ^CacheTempNameLast(rand)=name Set counter=$Increment(counter) If counter#1e5=0 Do ..Console(".",,, "") If counter#1e6=0 Do ..Console(lastrank-rank,,, "") }		Set rank=$Order(^CacheTempNameImport("last", rank), 1, data) }	While rank'=""

// now finished Do ..Console("") Do ..Console("", "$SortEnd=",, "") Set rc=$SortEnd(^CacheTempNameLast) Do ..Console($FNumber(rc, ",")) Set taken=$ZHoroLog-zhbegin Do ..Console($FNumber(taken/60, "", 0), "Duration=", " minutes") Quit $$$OK }

ClassMethod Update As %Status {	// display message on console Do ..Console Do ..Console("Deleting names") Set sc=..%DeleteExtent Do ..Console("Updating names") Set counter=0, zhbegin=$ZHoroLog // work though both names (first and last) Set key1=$Order(^CacheTempNameFirst(""), 1, data) If key1="" Quit Set key2=$Order(^CacheTempNameLast(""), 1, last) If key2="" Quit Do { // create new person object Set first=$Piece(data, ",") &sql(			INSERT %NOINDEX INTO Census.Person			SET NameLast=:last, NameFirst=:first		) If SQLCODE { Do ..Console("Failure") Quit }		// provide update Set counter=$Increment(counter) If counter#1e4=0 Do ..Console(".",,, "") If counter#1e6=0 Do ..Console(counter,,, "") //If counter>1e6 Quit // get next records Set key1=$Order(^CacheTempNameFirst(key1), 1, data) If key1="" Set key1=$Order(^CacheTempNameFirst(""), 1, data) Set key2=$Order(^CacheTempNameLast(key2), 1, last) }	While key2'="" // provide an update Set taken=$ZHoroLog-zhbegin Do ..Console("") Do ..Console($FNumber(taken/60, "", 0), "Duration=", " minutes") // remove temporary globals and finish Do ..Console("Removing temporary globals") Quit $$$OK Kill ^CacheTempNameImport Kill ^CacheTempNameFirst Kill ^CacheTempNameLast Quit $$$OK }

}