Billy Buchanan, Ph.D. Director, Office of Data, Research, and Accountability Fayette County Public Schools
<dependency>
<groupId>org.paces-consulting</groupId>
<artifactId>stata-java</artifactId>
<version>0.0.3-SNAPSHOT</version>
<type>jar</type>
<scope>provided</scope>
<optional>false</optional>
</dependency>
// Create a Meta Data object to access variables, observations, and other metadata
Meta metaDataObject = new Meta();
// Create a DataSet object to represent all of the data available in memory
DataSet myData = new DataSet(metaDataObject);
// Access Variable Names in an iterable object
List<String> variableNames = metaDataObject.getVarNames();
// Can access starting, ending, and number of observation indices as well
Number startingObservation = metaDataObject.getStataobs().getSobs().longValue();
Number endingObservation = metaDataObject.getStataobs().getEobs().longValue();
Number totalObservations = metaDataObject.getStataobs().getNobs().longValue();
// Can also access version specific observation indices
List<Integer> version13Obs = metaDataObject.getObs13();
List<Long> version14Obs = metaDataObject.getObs14();
// Value label names are also associated with variable names
Map<String, String> valueLabelNames = metaDataObject.getValueLabelNames();
// Variable Labels are associate with variable names
Map<String, String> variableLabels = metaDataObject.getVariableLabels();
// Class StataMissings provides methods to test for extended missing values and to map
// extended missing values correctly; Methods are overloaded to handle different types
// being passed to them
StataMissings.isExtendedMissing(value);
// Also includes Classes/Methods to provide convenience wrappers to generate iterable
// items from arguments and/or macro values
List<String> arguments = StringArgsToCollector.argsToCollector(String[] args);
// MDArrays class provides methods to cast multidimensional arrays of Objects to primitives
byte[][] recastedData = MDArrays.toPrimative(Byte[][] thedata);
double[][] recastedData = MDArrays.toPrimative(Double[][] thedata);
// Primative to Objects
Byte[][] recastedData = MDArrays.toObject(byte[][] thedata);
Double[][] recastedData = MDArrays.toPrimative(double[][] thedata);
// Arrays of primatives to nested lists
List<List<Byte>> recastedData = MDArrays.toList(byte[][] thedata);
// Arrays of objects to nested lists
List<List<Double>> recastedData = MDArrays.toList(Double[][] thedata);
/* Initial set up only required once */
// Locate the Stata Java API library
qui: findfile libstata-plugin.jar
// Assuming maven is already on your path
! mvn install:install-file -Dfile="`r(fn)'" -DgroupId=com.stata -DartifactId=sfi ///
-Dversion=`= int(`c(stata_version)')' -Dpackaging=jar
// Install the archetype command
net inst archetype, from(http://wbuchanan.github.io/stataArchetype)
// Then you can template the .ado, .sthlp, and Java directory
// structure with a "single" line of code
archetype , gr(org.paces-consulting) art(myStataLibrary) ver(0.0.1-SNAPSHOT) ///
mainc(ArchtypeClass) javacallm(callingMethod) buildd(~/conferenceExample) ///
pack(org.paces.stata.myArchetypeLibrary)
// Install development version
net inst raschjmle, from(http://wbuchanan.github.io/StatajMetrik)
// Load example data set used in IRT manuals
webuse masc1.dta, clear
// Fit the model, print the results to the screen and add person parameters to
// the current data set in memory
. raschjmle q1-q9
=================================================================================================
Item Difficulty Std. Error WMS Std. WMS UMS Std. UMS
-------------------------------------------------------------------------------------------------
q1 -0.40 0.08 0.85 -4.32 0.84 -2.86
q2 0.11 0.08 1.03 1.04 1.05 1.04
q3 -1.36 0.10 0.93 -1.39 0.86 -1.39
q4 0.49 0.08 0.99 -0.25 1.02 0.38
q5 1.66 0.09 0.93 -1.54 1.02 0.28
q6 0.82 0.08 0.93 -2.05 0.95 -0.82
q7 1.37 0.09 1.10 2.42 1.17 1.99
q8 -1.87 0.11 0.77 -3.81 0.85 -1.14
q9 -0.81 0.09 1.04 1.04 1.13 1.66
=================================================================================================
SCALE QUALITY STATISTICS
==================================================
Statistic Items Persons
--------------------------------------------------
Observed Variance 1.3031 1.4411
Observed Std. Dev. 1.1415 1.2005
Mean Square Error 0.0080 0.7097
Root MSE 0.0894 0.8425
Adjusted Variance 1.2951 0.7314
Adjusted Std. Dev. 1.1380 0.8552
Separation Index 12.7235 1.0151
Number of Strata 17.2980 1.6868
Reliability 0.9939 0.5075
==================================================
SCORE TABLE
==================================
Score Theta Std. Err
----------------------------------
0.00 -3.94 1.89
1.00 -2.55 1.12
2.00 -1.59 0.89
3.00 -0.89 0.80
4.00 -0.28 0.77
5.00 0.31 0.76
6.00 0.91 0.79
7.00 1.59 0.87
8.00 2.53 1.11
9.00 3.89 1.89
==================================
// Load example library
sysuse auto.dta, clear
// Phonetically encode the make of the cars
phoneticenc make, caverphone1(cav1) caverphone2(cav2) col(kolner) dms(daitch) dblm(dblmeta) ///
metap(metaphone) nys(nysiis) beiderm(bmencode) matchrating(mrating)
// List the first 5 results for several of the encodings
li make cav1 cav2 kolner daitch in 1/5
+---------------------------------------------------------------------------------+
| make cav1 cav2 kolner daitch |
|---------------------------------------------------------------------------------|
1. | AMC Concord AMKNKT AMKNKTNNNN 06846472656565656565656565656565 064649 |
2. | AMC Pacer AMKPSN AMKPSNNNNN 068187656565656565656565656565 064749 |
3. | AMC Spirit AMKSPR AMKSPRTNNN 0688172656565656565656565656565 064793 |
4. | Buick Century PKSNTR PKSNTRNNNN 148627656565656565656565656565 754639 |
5. | Buick Electra PKLKTR PKLKTRNNNN 145827656565656565656565656565 758439 |
+---------------------------------------------------------------------------------+
// This shows a few other encodings
li make dblmeta metaphone nysiis mrating in 1/5
+-------------------------------------------------------+
| make dblmeta metaph~e nysiis mrating |
|-------------------------------------------------------|
1. | AMC Concord AMKN AMKK ANCANC AMCLNL |
2. | AMC Pacer AMKP AMKP ANCPAC AMCLNL |
3. | AMC Spirit AMKS AMKS ANCSPA AMCLNL |
4. | Buick Century PKSN BKSN BACANT BCKLNL |
5. | Buick Electra PKLK BKLK BACALA BCKLNL |
+-------------------------------------------------------+
// Display the file system properties of the auto.dta file
filesys `c(sysdir_base)'a/auto.dta, attr dis
----------------------------------------------------------------------------------------------------
Attribute File Attribute Value
----------------------------------------------------------------------------------------------------
Created Date 20nov2015 05:44:54
Modified Date 20nov2015 05:44:54
Last Accessed Date 11dec2015 05:52:23
Absolute File Path /Applications/Stata/ado/base/a/auto.dta
Canonical File Path /Applications/Stata/ado/base/a/auto.dta
Parent Path /Applications/Stata/ado/base/a
File Name auto.dta
Is Symbolic Link false
Is Regular File true
Is Executable false
Is Hidden false
Is Readable true
Is Writable true
----------------------------------------------------------------------------------------------------
// The properties are returnable so they can be used elsewhere
ret li
macros:
r(iswritable) : "true"
r(isreadable) : "true"
r(parentpath) : "/Applications/Stata/ado/base/a"
r(ishidden) : "false"
r(filename) : "auto.dta"
r(isexecutable) : "false"
r(canonicalpath) : "/Applications/Stata/ado/base/a/auto.dta"
r(absoluepath) : "/Applications/Stata/ado/base/a/auto.dta"
r(filesize) : "6443"
r(regularfile) : "true"
r(symlink) : "false"
r(accessednum) : "1765432343000"
r(modifiednum) : "1763617494000"
r(creatednum) : "1763617494000"
r(accessed) : "11dec2015 05:52:23"
r(modified) : "20nov2015 05:44:54"
r(created) : "20nov2015 05:44:54"
// Run some Stata code (the markdown example for Stata 15)
dyndoc example/dyndoc_ex.txt, replace
// Now you can send the output as an email along with any other files of interest
// Can send to multiple recipients by comma delimiting addresses and attach multiple
// files in the same way
email, prop(emailtest.props) fr(billy@k12data.ninja) to(billy.buchanan@fayette.kyschools.us) ///
subject(another email test with Stata Email) html body(Test body) ///
a(pom.xml, dyndoc_ex.html)
// There are a lot of potential configuration options. If you host your own
// email server it should generally be easier to configure, otherwise reach
// out to your friendly - or sometimes not so friendly - IT professionals
// to provide the information you'd need to integrate with your email service
mail.smtp.user=
mail.smtp.host=
mail.smtp.port=
mail.smtp.connectiontimeout=
mail.smtp.timeout=
mail.smtp.from=
mail.smtp.localhost=
mail.smtp.localaddress=
mail.smtp.localport=
mail.smtp.ehlo=
mail.smtp.auth=
mail.smtp.auth.mechanisms=
mail.smtp.auth.login.disable=
mail.smtp.auth.plain.disable=
mail.smtp.auth.digest-md5.disable=
mail.smtp.auth.ntlm.disable=
mail.smtp.auth.ntlm.domain=
mail.smtp.auth.ntlm.flags=
mail.smtp.submitter=
mail.smtp.dsn.notify=
mail.smtp.dsn.ret=
mail.smtp.sendpartial=
mail.smtp.sasl.enable=
mail.smtp.sasl.mechanisms=
mail.smtp.sasl.authorizationid=
mail.smtp.sasl.realm=
mail.smtp.quitwait=
mail.smtp.reportsuccess=
mail.smtp.socketFactory=
mail.smtp.socketFactory.class=
mail.smtp.socketFactory.fallback=
mail.smtp.socketFactory.port=
mail.smtp.ssl.enable=
mail.smtp.ssl.checkserveridentity=
mail.smtp.ssl.trust=
mail.smtp.ssl.socketFactory=
mail.smtp.ssl.socketFactory.class=
mail.smtp.ssl.socketFactory.port=
mail.smtp.ssl.protocols=
mail.smtp.starttls.enable=
mail.smtp.starttls.required=
mail.smtp.socks.host=
mail.smtp.socks.port=
mail.smtp.mailextension=
mail.smtp.userset=
// If you only need to monitor basic OS statistics you can use
// the sysresources command
sysresources
Total committed memory is : 7.9 GiB
Free Swap Space is : 0 B
Total Available Swap Space is : 0 B
Process CPU Time is : 812383000
Free Physical Memory Available is : 4.1 GiB
Total Physical Memory Available is : 16.0 GiB
System CPU Load is : 0.0
Process Load is : 0.0
% Free Physical Memory is : 0
// You could also parse a terminal command
clicmd vm_stat
// And return the results in local macros for futher use
ret li
macros:
r(Pages_purged) : "1853972"
r(Mach_Virtual_Memo
ry_Statistics) : "bytes"
r(File) : "202582"
r(Swapins) : "0"
r(Pages_purgeable) : "284216"
r(Pages_occupied_by
_compressor) : "867800"
r(Pages_stored_in_c
ompressor) : "1619871"
r(Pages_active) : "1340421"
r(Pageouts) : "753"
r(Swapouts) : "0"
r(Compressions) : "7624001"
r(Pages_reactivated) : "1366631"
r(Pages_copy) : "11097418"
r(Pages_wired_down) : "504969"
r(Pageins) : "4262570"
r(Anonymous_pages) : "1276410"
r(Pages_inactive) : "56581"
r(Pages_zero_filled) : "119304436"
r(Pages_free) : "1340925"
r(Pages_throttled) : "0"
r(Decompressions) : "4826559"
r(Pages_speculative) : "81990"
r(raw) : "Mach Virtual Memory Statistics: (page size of 4096 bytes)Pages free: 1340925.Pages active: .."
r(clgroups) : "2, 4"
r(pgroups) : "2, 4"
r(cleaner) : "(^([\w _]{1,}+)(\W{1,}.*)$)"
r(parser) : "((.*)(\s{1,}+)(.*))"
// Or parse a much more complex shell command and return the values
// The advantage here is that we aren't redirecting from stdout to disk
// and then parsing the contents from disk, but are handling everything
// in memory (at least to the best of my knowledge)
clicmd "/bin/sh -c ps -x | grep StataMP.app", d
-------------------------------------------------------------------------------------
Key Value
-------------------------------------------------------------------------------------
parser .*
cleaner .*
pgroups 0, 0
clgroups 0, 0
exec /bin/sh -c ps -x | grep StataMP.app
line3 7056 ttys000 0:00.36 stata-mp
line2 6869 ttys000 0:00.08 /bin/bash --login
line1 PID TTY TIME CMD
-------------------------------------------------------------------------------------
// Serialize a couple records as JSON objects
. jsonio out in 73/74, what(record)
[{
"mpg" : 25.0,
"price" : 6850.0,
"headroom" : 2.0,
"rep78" : 4.0,
"length" : 156.0,
"weight" : 1990.0,
"displacement" : 97.0,
"turn" : 36.0,
"trunk" : 16.0,
"make" : "VW Scirocco",
"gear_ratio" : 3.7799999713897705,
"foreign" : 1.0
},
{
"mpg" : 17.0,
"price" : 11995.0,
"headroom" : 2.5,
"rep78" : 5.0,
"length" : 193.0,
"weight" : 3170.0,
"displacement" : 163.0,
"turn" : 37.0,
"trunk" : 14.0,
"make" : "Volvo 260",
"gear_ratio" : 2.9800000190734863,
"foreign" : 1.0
}]
// There are still some challenges with data ingest, but definitey a fair
// amount of support for reading the data into different structures for
// further manipulation:
// Load data from Google Maps Directions API and query a subset of the
// elements to load based on attribute names
jsonio kv, file("~/Desktop/waypointsResponse.json") nourl ///
elem("(legs_[0-9]/((start)|(end))_location/((lat)|(lng)))")
// Describe the data set in memory
desc
Contains data
obs: 12
vars: 2
size: 624
----------------------------------------------------------------------------------------------------------------------------------------------------------------
storage display value
variable name type format label variable label
----------------------------------------------------------------------------------------------------------------------------------------------------------------
key str44 %44s
value double %10.0g
----------------------------------------------------------------------------------------------------------------------------------------------------------------
Sorted by:
Note: Dataset has changed since last saved.
// Show the data
li, fast sep(0)
+-----------------------------------------------------------+
| key value |
|-----------------------------------------------------------|
1. | /routes_1/legs_1/end_location/lat 42.378175 |
2. | /routes_1/legs_1/end_location/lng -71.060226 |
3. | /routes_1/legs_1/start_location/lat 42.359824 |
4. | /routes_1/legs_1/start_location/lng -71.059812 |
5. | /routes_1/legs_2/end_location/lat 42.442609 |
6. | /routes_1/legs_2/end_location/lng -71.229336 |
7. | /routes_1/legs_2/start_location/lat 42.378175 |
8. | /routes_1/legs_2/start_location/lng -71.060226 |
9. | /routes_1/legs_3/end_location/lat 42.460387 |
10. | /routes_1/legs_3/end_location/lng -71.348931 |
11. | /routes_1/legs_3/start_location/lat 42.442609 |
12. | /routes_1/legs_3/start_location/lng -71.229336 |
+-----------------------------------------------------------+
// Clear data from memory
clear
// Although still very early in development, also some support for
// loading the JSON data as a table (test data provided by J. Canner)
jsonio table, nourl file(cannerTest.json)
desc
Contains data
obs: 10
vars: 10
size: 120,070
--------------------------------------------------------------------------
storage display value
variable name type format label variable label
--------------------------------------------------------------------------
asin str2000 %2000s asin
helpful1 byte %10.0g
helpful2 byte %10.0g
overall byte %10.0g overall
reviewText str2000 %2000s reviewText
reviewTime str2000 %2000s reviewTime
reviewerID str2000 %2000s reviewerID
reviewerName str2000 %2000s reviewerName
summary str2000 %2000s summary
unixReviewTime long %10.0g unixReviewTime
--------------------------------------------------------------------------
Sorted by:
Note: Dataset has changed since last saved.