"
]
},
- "execution_count": 7,
+ "execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
@@ -389,16 +321,16 @@
},
{
"cell_type": "code",
- "execution_count": 8,
+ "execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
- "['datastax:spark-cassandra-connector:1.6.1-s_2.10']"
+ "['com.databricks:spark-avro_2.11:4.0.0']"
]
},
- "execution_count": 8,
+ "execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
@@ -416,7 +348,7 @@
},
{
"cell_type": "code",
- "execution_count": 9,
+ "execution_count": 6,
"metadata": {},
"outputs": [
{
@@ -602,67 +534,99 @@
" \n",
" \n",
" \n",
- " Optim'us \n",
+ " Optim'us\n",
+ " \n",
" | \n",
" \n",
" \n",
- " 28 \n",
+ " 28\n",
+ " \n",
" | \n",
" \n",
" \n",
- " Leader \n",
+ " Leader\n",
+ " \n",
" | \n",
" \n",
" \n",
- " 10 \n",
+ " 10\n",
+ " \n",
" | \n",
" \n",
" \n",
- " 5000000 \n",
+ " 5000000\n",
+ " \n",
" | \n",
" \n",
" \n",
- " 4.300000190734863 \n",
+ " 4.300000190734863\n",
+ " \n",
" | \n",
" \n",
" \n",
- " ['Inochi',⸱'Convoy'] \n",
+ " ['Inochi',⋅'Convoy']\n",
+ " \n",
" | \n",
" \n",
" \n",
- " 19.442735,-99.201111 \n",
+ " 19.442735,-99.201111\n",
+ " \n",
" | \n",
" \n",
" \n",
- " 1980/04/10 \n",
+ " 1980/04/10\n",
+ " \n",
" | \n",
" \n",
" \n",
- " 2016/09/10 \n",
+ " 2016/09/10\n",
+ " \n",
" | \n",
" \n",
" \n",
- " [8.53439998626709,⸱4300.0] \n",
+ " [8.53439998626709,⋅4300.0]\n",
+ " \n",
" | \n",
" \n",
" \n",
- " 2016-09-10 \n",
+ " 2016-09-10\n",
+ " \n",
" | \n",
" \n",
" \n",
- " 2014-06-24⸱00:00:00 \n",
+ " 2014-06-24⋅00:00:00\n",
+ " \n",
" | \n",
" \n",
" \n",
- " True \n",
+ " True\n",
+ " \n",
" | \n",
" \n",
" \n",
- " bytearray(b'Leader') \n",
+ " None\n",
+ " \n",
" | \n",
" \n",
" \n",
- " None \n",
+ " None\n",
+ " \n",
" | \n",
" \n",
"
\n",
@@ -670,67 +634,99 @@
" \n",
" \n",
" \n",
- " bumbl#ebéé⸱⸱ \n",
+ " bumbl#ebéé⋅⋅\n",
+ " \n",
" | \n",
" \n",
" \n",
- " 17 \n",
+ " 17\n",
+ " \n",
" | \n",
" \n",
" \n",
- " Espionage \n",
+ " Espionage\n",
+ " \n",
" | \n",
" \n",
" \n",
- " 7 \n",
+ " 7\n",
+ " \n",
" | \n",
" \n",
" \n",
- " 5000000 \n",
+ " 5000000\n",
+ " \n",
" | \n",
" \n",
" \n",
- " 2.0 \n",
+ " 2.0\n",
+ " \n",
" | \n",
" \n",
" \n",
- " ['Bumble',⸱'Goldback'] \n",
+ " ['Bumble',⋅'Goldback']\n",
+ " \n",
" | \n",
" \n",
" \n",
- " 10.642707,-71.612534 \n",
+ " 10.642707,-71.612534\n",
+ " \n",
" | \n",
" \n",
" \n",
- " 1980/04/10 \n",
+ " 1980/04/10\n",
+ " \n",
" | \n",
" \n",
" \n",
- " 2015/08/10 \n",
+ " 2015/08/10\n",
+ " \n",
" | \n",
" \n",
" \n",
- " [5.334000110626221,⸱2000.0] \n",
+ " [5.334000110626221,⋅2000.0]\n",
+ " \n",
" | \n",
" \n",
" \n",
- " 2015-08-10 \n",
+ " 2015-08-10\n",
+ " \n",
" | \n",
" \n",
" \n",
- " 2014-06-24⸱00:00:00 \n",
+ " 2014-06-24⋅00:00:00\n",
+ " \n",
" | \n",
" \n",
" \n",
- " True \n",
+ " True\n",
+ " \n",
" | \n",
" \n",
" \n",
- " bytearray(b'Espionage') \n",
+ " None\n",
+ " \n",
" | \n",
" \n",
" \n",
- " None \n",
+ " None\n",
+ " \n",
" | \n",
" \n",
"
\n",
@@ -738,67 +734,99 @@
" \n",
" \n",
" \n",
- " ironhide& \n",
+ " ironhide&\n",
+ " \n",
" | \n",
" \n",
" \n",
- " 26 \n",
+ " 26\n",
+ " \n",
" | \n",
" \n",
" \n",
- " Security \n",
+ " Security\n",
+ " \n",
" | \n",
" \n",
" \n",
- " 7 \n",
+ " 7\n",
+ " \n",
" | \n",
" \n",
" \n",
- " 5000000 \n",
+ " 5000000\n",
+ " \n",
" | \n",
" \n",
" \n",
- " 4.0 \n",
+ " 4.0\n",
+ " \n",
" | \n",
" \n",
" \n",
- " ['Roadbuster'] \n",
+ " ['Roadbuster']\n",
+ " \n",
" | \n",
" \n",
" \n",
- " 37.789563,-122.400356 \n",
+ " 37.789563,-122.400356\n",
+ " \n",
" | \n",
" \n",
" \n",
- " 1980/04/10 \n",
+ " 1980/04/10\n",
+ " \n",
" | \n",
" \n",
" \n",
- " 2014/07/10 \n",
+ " 2014/07/10\n",
+ " \n",
" | \n",
" \n",
" \n",
- " [7.924799919128418,⸱4000.0] \n",
+ " [7.924799919128418,⋅4000.0]\n",
+ " \n",
" | \n",
" \n",
" \n",
- " 2014-06-24 \n",
+ " 2014-06-24\n",
+ " \n",
" | \n",
" \n",
" \n",
- " 2014-06-24⸱00:00:00 \n",
+ " 2014-06-24⋅00:00:00\n",
+ " \n",
" | \n",
" \n",
" \n",
- " True \n",
+ " True\n",
+ " \n",
" | \n",
" \n",
" \n",
- " bytearray(b'Security') \n",
+ " None\n",
+ " \n",
" | \n",
" \n",
" \n",
- " None \n",
+ " None\n",
+ " \n",
" | \n",
" \n",
"
\n",
@@ -806,67 +834,99 @@
" \n",
" \n",
" \n",
- " Jazz \n",
+ " Jazz\n",
+ " \n",
" | \n",
" \n",
" \n",
- " 13 \n",
+ " 13\n",
+ " \n",
" | \n",
" \n",
" \n",
- " First⸱Lieutenant \n",
+ " First⋅Lieutenant\n",
+ " \n",
" | \n",
" \n",
" \n",
- " 8 \n",
+ " 8\n",
+ " \n",
" | \n",
" \n",
" \n",
- " 5000000 \n",
+ " 5000000\n",
+ " \n",
" | \n",
" \n",
" \n",
- " 1.7999999523162842 \n",
+ " 1.7999999523162842\n",
+ " \n",
" | \n",
" \n",
" \n",
- " ['Meister'] \n",
+ " ['Meister']\n",
+ " \n",
" | \n",
" \n",
" \n",
- " 33.670666,-117.841553 \n",
+ " 33.670666,-117.841553\n",
+ " \n",
" | \n",
" \n",
" \n",
- " 1980/04/10 \n",
+ " 1980/04/10\n",
+ " \n",
" | \n",
" \n",
" \n",
- " 2013/06/10 \n",
+ " 2013/06/10\n",
+ " \n",
" | \n",
" \n",
" \n",
- " [3.962399959564209,⸱1800.0] \n",
+ " [3.962399959564209,⋅1800.0]\n",
+ " \n",
" | \n",
" \n",
" \n",
- " 2013-06-24 \n",
+ " 2013-06-24\n",
+ " \n",
" | \n",
" \n",
" \n",
- " 2014-06-24⸱00:00:00 \n",
+ " 2014-06-24⋅00:00:00\n",
+ " \n",
" | \n",
" \n",
" \n",
- " True \n",
+ " True\n",
+ " \n",
" | \n",
" \n",
" \n",
- " bytearray(b'First⸱Lieutenant') \n",
+ " None\n",
+ " \n",
" | \n",
" \n",
" \n",
- " None \n",
+ " None\n",
+ " \n",
" | \n",
" \n",
"
\n",
@@ -874,67 +934,99 @@
" \n",
" \n",
" \n",
- " Megatron \n",
+ " Megatron\n",
+ " \n",
" | \n",
" \n",
" \n",
- " None \n",
+ " None\n",
+ " \n",
" | \n",
" \n",
" \n",
- " None \n",
+ " None\n",
+ " \n",
" | \n",
" \n",
" \n",
- " 10 \n",
+ " 10\n",
+ " \n",
" | \n",
" \n",
" \n",
- " 5000000 \n",
+ " 5000000\n",
+ " \n",
" | \n",
" \n",
" \n",
- " 5.699999809265137 \n",
+ " 5.699999809265137\n",
+ " \n",
" | \n",
" \n",
" \n",
- " ['Megatron'] \n",
+ " ['Megatron']\n",
+ " \n",
" | \n",
" \n",
" \n",
- " None \n",
+ " None\n",
+ " \n",
" | \n",
" \n",
" \n",
- " 1980/04/10 \n",
+ " 1980/04/10\n",
+ " \n",
" | \n",
" \n",
" \n",
- " 2012/05/10 \n",
+ " 2012/05/10\n",
+ " \n",
" | \n",
" \n",
" \n",
- " [None,⸱5700.0] \n",
+ " [None,⋅5700.0]\n",
+ " \n",
" | \n",
" \n",
" \n",
- " 2012-05-10 \n",
+ " 2012-05-10\n",
+ " \n",
" | \n",
" \n",
" \n",
- " 2014-06-24⸱00:00:00 \n",
+ " 2014-06-24⋅00:00:00\n",
+ " \n",
" | \n",
" \n",
" \n",
- " True \n",
+ " True\n",
+ " \n",
" | \n",
" \n",
" \n",
- " bytearray(b'None') \n",
+ " None\n",
+ " \n",
" | \n",
" \n",
" \n",
- " None \n",
+ " None\n",
+ " \n",
" | \n",
" \n",
"
\n",
@@ -942,67 +1034,99 @@
" \n",
" \n",
" \n",
- " Metroplex_)^$ \n",
+ " Metroplex_)^$\n",
+ " \n",
" | \n",
" \n",
" \n",
- " 300 \n",
+ " 300\n",
+ " \n",
" | \n",
" \n",
" \n",
- " Battle⸱Station \n",
+ " Battle⋅Station\n",
+ " \n",
" | \n",
" \n",
" \n",
- " 8 \n",
+ " 8\n",
+ " \n",
" | \n",
" \n",
" \n",
- " 5000000 \n",
+ " 5000000\n",
+ " \n",
" | \n",
" \n",
" \n",
- " None \n",
+ " None\n",
+ " \n",
" | \n",
" \n",
" \n",
- " ['Metroflex'] \n",
+ " ['Metroflex']\n",
+ " \n",
" | \n",
" \n",
" \n",
- " None \n",
+ " None\n",
+ " \n",
" | \n",
" \n",
" \n",
- " 1980/04/10 \n",
+ " 1980/04/10\n",
+ " \n",
" | \n",
" \n",
" \n",
- " 2011/04/10 \n",
+ " 2011/04/10\n",
+ " \n",
" | \n",
" \n",
" \n",
- " [91.44000244140625,⸱None] \n",
+ " [91.44000244140625,⋅None]\n",
+ " \n",
" | \n",
" \n",
" \n",
- " 2011-04-10 \n",
+ " 2011-04-10\n",
+ " \n",
" | \n",
" \n",
" \n",
- " 2014-06-24⸱00:00:00 \n",
+ " 2014-06-24⋅00:00:00\n",
+ " \n",
" | \n",
" \n",
" \n",
- " True \n",
+ " True\n",
+ " \n",
" | \n",
" \n",
" \n",
- " bytearray(b'Battle⸱Station') \n",
+ " None\n",
+ " \n",
" | \n",
" \n",
" \n",
- " None \n",
+ " None\n",
+ " \n",
" | \n",
" \n",
"
\n",
@@ -1078,7 +1202,7 @@
},
{
"cell_type": "code",
- "execution_count": 10,
+ "execution_count": null,
"metadata": {},
"outputs": [],
"source": [
@@ -1102,10 +1226,33 @@
},
{
"cell_type": "code",
- "execution_count": 11,
+ "execution_count": null,
"metadata": {
"scrolled": true
},
+ "outputs": [],
+ "source": [
+ "op.append([df,df], like=\"rows\").table()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## IO Operations"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Load from file"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 20,
+ "metadata": {},
"outputs": [
{
"data": {
@@ -1115,7 +1262,7 @@
"\n",
"\n",
"\n",
- "Viewing 12 of 12 rows / 16 columns
\n",
+ "Viewing 5 of 19 rows / 8 columns
\n",
"1 partition(s)
\n",
"\n",
"\n",
@@ -1123,8 +1270,8 @@
" \n",
" \n",
" \n",
- " names \n",
- " 1 (string) \n",
+ " id \n",
+ " 1 (int) \n",
" \n",
" \n",
" nullable\n",
@@ -1133,836 +1280,128 @@
" | \n",
" \n",
" \n",
- " height(ft) \n",
- " 2 (int) \n",
- " \n",
- " \n",
- " nullable\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " \n",
- " function \n",
- " 3 (string) \n",
- " \n",
- " \n",
- " nullable\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " \n",
- " rank \n",
- " 4 (int) \n",
- " \n",
- " \n",
- " nullable\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " \n",
- " age \n",
- " 5 (int) \n",
- " \n",
- " \n",
- " nullable\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " \n",
- " weight(t) \n",
- " 6 (float) \n",
- " \n",
- " \n",
- " nullable\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " \n",
- " japanese name \n",
- " 7 (array<string>) \n",
- " \n",
- " \n",
- " nullable\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " \n",
- " last position seen \n",
- " 8 (string) \n",
- " \n",
- " \n",
- " nullable\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " \n",
- " date arrival \n",
- " 9 (string) \n",
- " \n",
- " \n",
- " nullable\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " \n",
- " last date seen \n",
- " 10 (string) \n",
- " \n",
- " \n",
- " nullable\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " \n",
- " attributes \n",
- " 11 (array<float>) \n",
- " \n",
- " \n",
- " nullable\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " \n",
- " DateType \n",
- " 12 (date) \n",
- " \n",
- " \n",
- " nullable\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " \n",
- " Tiemstamp \n",
- " 13 (timestamp) \n",
- " \n",
- " \n",
- " nullable\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " \n",
- " Cybertronian \n",
- " 14 (boolean) \n",
- " \n",
- " \n",
- " nullable\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " \n",
- " function(binary) \n",
- " 15 (binary) \n",
- " \n",
- " \n",
- " nullable\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " \n",
- " NullType \n",
- " 16 (null) \n",
- " \n",
- " \n",
- " nullable\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- "
\n",
- "\n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " Optim'us \n",
- " | \n",
- " \n",
- " \n",
- " 28 \n",
- " | \n",
- " \n",
- " \n",
- " Leader \n",
- " | \n",
- " \n",
- " \n",
- " 10 \n",
- " | \n",
- " \n",
- " \n",
- " 5000000 \n",
- " | \n",
- " \n",
- " \n",
- " 4.300000190734863 \n",
- " | \n",
- " \n",
- " \n",
- " ['Inochi',⸱'Convoy'] \n",
- " | \n",
- " \n",
- " \n",
- " 19.442735,-99.201111 \n",
- " | \n",
- " \n",
- " \n",
- " 1980/04/10 \n",
- " | \n",
- " \n",
- " \n",
- " 2016/09/10 \n",
- " | \n",
- " \n",
- " \n",
- " [8.53439998626709,⸱4300.0] \n",
- " | \n",
- " \n",
- " \n",
- " 2016-09-10 \n",
- " | \n",
- " \n",
- " \n",
- " 2014-06-24⸱00:00:00 \n",
- " | \n",
- " \n",
- " \n",
- " True \n",
- " | \n",
- " \n",
- " \n",
- " bytearray(b'Leader') \n",
- " | \n",
- " \n",
- " \n",
- " None \n",
- " | \n",
- " \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " \n",
- " bumbl#ebéé⸱⸱ \n",
- " | \n",
- " \n",
- " \n",
- " 17 \n",
- " | \n",
- " \n",
- " \n",
- " Espionage \n",
- " | \n",
- " \n",
- " \n",
- " 7 \n",
- " | \n",
- " \n",
- " \n",
- " 5000000 \n",
- " | \n",
- " \n",
- " \n",
- " 2.0 \n",
- " | \n",
- " \n",
- " \n",
- " ['Bumble',⸱'Goldback'] \n",
- " | \n",
- " \n",
- " \n",
- " 10.642707,-71.612534 \n",
- " | \n",
- " \n",
- " \n",
- " 1980/04/10 \n",
- " | \n",
- " \n",
- " \n",
- " 2015/08/10 \n",
- " | \n",
- " \n",
- " \n",
- " [5.334000110626221,⸱2000.0] \n",
- " | \n",
- " \n",
- " \n",
- " 2015-08-10 \n",
- " | \n",
- " \n",
- " \n",
- " 2014-06-24⸱00:00:00 \n",
- " | \n",
- " \n",
- " \n",
- " True \n",
- " | \n",
- " \n",
- " \n",
- " bytearray(b'Espionage') \n",
- " | \n",
- " \n",
- " \n",
- " None \n",
- " | \n",
- " \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " \n",
- " ironhide& \n",
- " | \n",
- " \n",
- " \n",
- " 26 \n",
- " | \n",
- " \n",
- " \n",
- " Security \n",
- " | \n",
- " \n",
- " \n",
- " 7 \n",
- " | \n",
- " \n",
- " \n",
- " 5000000 \n",
- " | \n",
- " \n",
- " \n",
- " 4.0 \n",
- " | \n",
- " \n",
- " \n",
- " ['Roadbuster'] \n",
- " | \n",
- " \n",
- " \n",
- " 37.789563,-122.400356 \n",
- " | \n",
- " \n",
- " \n",
- " 1980/04/10 \n",
- " | \n",
- " \n",
- " \n",
- " 2014/07/10 \n",
- " | \n",
- " \n",
- " \n",
- " [7.924799919128418,⸱4000.0] \n",
- " | \n",
- " \n",
- " \n",
- " 2014-06-24 \n",
- " | \n",
- " \n",
- " \n",
- " 2014-06-24⸱00:00:00 \n",
- " | \n",
- " \n",
- " \n",
- " True \n",
- " | \n",
- " \n",
- " \n",
- " bytearray(b'Security') \n",
- " | \n",
- " \n",
- " \n",
- " None \n",
- " | \n",
- " \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " \n",
- " Jazz \n",
- " | \n",
- " \n",
- " \n",
- " 13 \n",
- " | \n",
- " \n",
- " \n",
- " First⸱Lieutenant \n",
- " | \n",
- " \n",
- " \n",
- " 8 \n",
- " | \n",
- " \n",
- " \n",
- " 5000000 \n",
- " | \n",
- " \n",
- " \n",
- " 1.7999999523162842 \n",
- " | \n",
- " \n",
- " \n",
- " ['Meister'] \n",
- " | \n",
- " \n",
- " \n",
- " 33.670666,-117.841553 \n",
- " | \n",
- " \n",
- " \n",
- " 1980/04/10 \n",
- " | \n",
- " \n",
- " \n",
- " 2013/06/10 \n",
- " | \n",
- " \n",
- " \n",
- " [3.962399959564209,⸱1800.0] \n",
- " | \n",
- " \n",
- " \n",
- " 2013-06-24 \n",
- " | \n",
- " \n",
- " \n",
- " 2014-06-24⸱00:00:00 \n",
- " | \n",
- " \n",
- " \n",
- " True \n",
- " | \n",
- " \n",
- " \n",
- " bytearray(b'First⸱Lieutenant') \n",
- " | \n",
- " \n",
- " \n",
- " None \n",
- " | \n",
- " \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " \n",
- " Megatron \n",
- " | \n",
- " \n",
- " \n",
- " None \n",
- " | \n",
- " \n",
- " \n",
- " None \n",
- " | \n",
- " \n",
- " \n",
- " 10 \n",
- " | \n",
- " \n",
- " \n",
- " 5000000 \n",
- " | \n",
- " \n",
- " \n",
- " 5.699999809265137 \n",
- " | \n",
- " \n",
- " \n",
- " ['Megatron'] \n",
- " | \n",
- " \n",
- " \n",
- " None \n",
- " | \n",
- " \n",
- " \n",
- " 1980/04/10 \n",
- " | \n",
- " \n",
- " \n",
- " 2012/05/10 \n",
- " | \n",
- " \n",
- " \n",
- " [None,⸱5700.0] \n",
- " | \n",
- " \n",
- " \n",
- " 2012-05-10 \n",
- " | \n",
- " \n",
- " \n",
- " 2014-06-24⸱00:00:00 \n",
- " | \n",
- " \n",
- " \n",
- " True \n",
- " | \n",
- " \n",
- " \n",
- " bytearray(b'None') \n",
- " | \n",
- " \n",
- " \n",
- " None \n",
- " | \n",
- " \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " \n",
- " Metroplex_)^$ \n",
- " | \n",
- " \n",
- " \n",
- " 300 \n",
- " | \n",
- " \n",
- " \n",
- " Battle⸱Station \n",
- " | \n",
- " \n",
- " \n",
- " 8 \n",
- " | \n",
- " \n",
- " \n",
- " 5000000 \n",
- " | \n",
- " \n",
- " \n",
- " None \n",
- " | \n",
- " \n",
- " \n",
- " ['Metroflex'] \n",
- " | \n",
- " \n",
- " \n",
- " None \n",
- " | \n",
- " \n",
- " \n",
- " 1980/04/10 \n",
- " | \n",
- " \n",
- " \n",
- " 2011/04/10 \n",
- " | \n",
- " \n",
- " \n",
- " [91.44000244140625,⸱None] \n",
- " | \n",
- " \n",
- " \n",
- " 2011-04-10 \n",
- " | \n",
- " \n",
- " \n",
- " 2014-06-24⸱00:00:00 \n",
- " | \n",
- " \n",
- " \n",
- " True \n",
- " | \n",
- " \n",
- " \n",
- " bytearray(b'Battle⸱Station') \n",
- " | \n",
- " \n",
- " \n",
- " None \n",
- " | \n",
- " \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " \n",
- " Optim'us \n",
- " | \n",
- " \n",
- " \n",
- " 28 \n",
- " | \n",
- " \n",
- " \n",
- " Leader \n",
- " | \n",
- " \n",
- " \n",
- " 10 \n",
- " | \n",
- " \n",
- " \n",
- " 5000000 \n",
- " | \n",
- " \n",
- " \n",
- " 4.300000190734863 \n",
- " | \n",
- " \n",
- " \n",
- " ['Inochi',⸱'Convoy'] \n",
- " | \n",
- " \n",
- " \n",
- " 19.442735,-99.201111 \n",
- " | \n",
- " \n",
- " \n",
- " 1980/04/10 \n",
- " | \n",
- " \n",
- " \n",
- " 2016/09/10 \n",
- " | \n",
- " \n",
- " \n",
- " [8.53439998626709,⸱4300.0] \n",
- " | \n",
- " \n",
- " \n",
- " 2016-09-10 \n",
- " | \n",
- " \n",
- " \n",
- " 2014-06-24⸱00:00:00 \n",
- " | \n",
- " \n",
- " \n",
- " True \n",
- " | \n",
- " \n",
- " \n",
- " bytearray(b'Leader') \n",
- " | \n",
- " \n",
- " \n",
- " None \n",
- " | \n",
- " \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " \n",
- " bumbl#ebéé⸱⸱ \n",
- " | \n",
- " \n",
- " \n",
- " 17 \n",
- " | \n",
- " \n",
- " \n",
- " Espionage \n",
- " | \n",
- " \n",
- " \n",
- " 7 \n",
- " | \n",
- " \n",
- " \n",
- " 5000000 \n",
- " | \n",
- " \n",
- " \n",
- " 2.0 \n",
- " | \n",
- " \n",
- " \n",
- " ['Bumble',⸱'Goldback'] \n",
- " | \n",
- " \n",
- " \n",
- " 10.642707,-71.612534 \n",
- " | \n",
- " \n",
- " \n",
- " 1980/04/10 \n",
- " | \n",
- " \n",
- " \n",
- " 2015/08/10 \n",
- " | \n",
- " \n",
- " \n",
- " [5.334000110626221,⸱2000.0] \n",
- " | \n",
- " \n",
- " \n",
- " 2015-08-10 \n",
- " | \n",
- " \n",
- " \n",
- " 2014-06-24⸱00:00:00 \n",
- " | \n",
- " \n",
- " \n",
- " True \n",
- " | \n",
- " \n",
- " \n",
- " bytearray(b'Espionage') \n",
- " | \n",
- " \n",
- " \n",
- " None \n",
- " | \n",
- " \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " \n",
- " ironhide& \n",
- " | \n",
- " \n",
- " \n",
- " 26 \n",
- " | \n",
- " \n",
- " \n",
- " Security \n",
- " | \n",
- " \n",
- " \n",
- " 7 \n",
- " | \n",
- " \n",
- " \n",
- " 5000000 \n",
- " | \n",
- " \n",
- " \n",
- " 4.0 \n",
- " | \n",
- " \n",
- " \n",
- " ['Roadbuster'] \n",
- " | \n",
- " \n",
- " \n",
- " 37.789563,-122.400356 \n",
- " | \n",
- " \n",
- " \n",
- " 1980/04/10 \n",
- " | \n",
- " \n",
- " \n",
- " 2014/07/10 \n",
- " | \n",
+ " firstName
\n",
+ " 2 (string)
\n",
+ " \n",
+ " \n",
+ " nullable\n",
+ " \n",
+ "
\n",
+ " \n",
" \n",
- " \n",
- " [7.924799919128418,⸱4000.0] \n",
- " | \n",
+ " \n",
+ " lastName \n",
+ " 3 (string) \n",
+ " \n",
+ " \n",
+ " nullable\n",
+ " \n",
+ " \n",
+ " | \n",
" \n",
- " \n",
- " 2014-06-24 \n",
- " | \n",
+ " \n",
+ " billingId \n",
+ " 4 (int) \n",
+ " \n",
+ " \n",
+ " nullable\n",
+ " \n",
+ " \n",
+ " | \n",
" \n",
- " \n",
- " 2014-06-24⸱00:00:00 \n",
- " | \n",
+ " \n",
+ " product \n",
+ " 5 (string) \n",
+ " \n",
+ " \n",
+ " nullable\n",
+ " \n",
+ " \n",
+ " | \n",
" \n",
- " \n",
- " True \n",
- " | \n",
+ " \n",
+ " price \n",
+ " 6 (int) \n",
+ " \n",
+ " \n",
+ " nullable\n",
+ " \n",
+ " \n",
+ " | \n",
" \n",
- " \n",
- " bytearray(b'Security') \n",
- " | \n",
+ " \n",
+ " birth \n",
+ " 7 (string) \n",
+ " \n",
+ " \n",
+ " nullable\n",
+ " \n",
+ " \n",
+ " | \n",
" \n",
- " \n",
- " None \n",
- " | \n",
+ " \n",
+ " dummyCol \n",
+ " 8 (string) \n",
+ " \n",
+ " \n",
+ " nullable\n",
+ " \n",
+ " \n",
+ " | \n",
" \n",
"
\n",
+ "\n",
+ " \n",
+ " \n",
" \n",
" \n",
" \n",
" \n",
- " Jazz \n",
- " | \n",
- " \n",
- " \n",
- " 13 \n",
- " | \n",
- " \n",
- " \n",
- " First⸱Lieutenant \n",
- " | \n",
- " \n",
- " \n",
- " 8 \n",
- " | \n",
- " \n",
- " \n",
- " 5000000 \n",
- " | \n",
- " \n",
- " \n",
- " 1.7999999523162842 \n",
- " | \n",
- " \n",
- " \n",
- " ['Meister'] \n",
- " | \n",
- " \n",
- " \n",
- " 33.670666,-117.841553 \n",
- " | \n",
- " \n",
- " \n",
- " 1980/04/10 \n",
+ " 1\n",
+ " \n",
" | \n",
" \n",
" \n",
- " 2013/06/10 \n",
+ " Luis\n",
+ " \n",
" | \n",
" \n",
" \n",
- " [3.962399959564209,⸱1800.0] \n",
+ " Alvarez$$%!\n",
+ " \n",
" | \n",
" \n",
" \n",
- " 2013-06-24 \n",
+ " 123\n",
+ " \n",
" | \n",
" \n",
" \n",
- " 2014-06-24⸱00:00:00 \n",
+ " Cake\n",
+ " \n",
" | \n",
" \n",
" \n",
- " True \n",
+ " 10\n",
+ " \n",
" | \n",
" \n",
" \n",
- " bytearray(b'First⸱Lieutenant') \n",
+ " 1980/07/07\n",
+ " \n",
" | \n",
" \n",
" \n",
- " None \n",
+ " never\n",
+ " \n",
" | \n",
" \n",
"
\n",
@@ -1970,67 +1409,103 @@
" \n",
" \n",
" \n",
- " Megatron \n",
+ " 2\n",
+ " \n",
" | \n",
" \n",
" \n",
- " None \n",
+ " André\n",
+ " \n",
" | \n",
" \n",
" \n",
- " None \n",
+ " Ampère\n",
+ " \n",
" | \n",
" \n",
" \n",
- " 10 \n",
+ " 423\n",
+ " \n",
" | \n",
" \n",
" \n",
- " 5000000 \n",
+ " piza\n",
+ " \n",
" | \n",
" \n",
" \n",
- " 5.699999809265137 \n",
+ " 8\n",
+ " \n",
" | \n",
" \n",
" \n",
- " ['Megatron'] \n",
+ " 1950/07/08\n",
+ " \n",
" | \n",
" \n",
" \n",
- " None \n",
+ " gonna\n",
+ " \n",
" | \n",
" \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
" \n",
- " 1980/04/10 \n",
+ " 3\n",
+ " \n",
" | \n",
" \n",
" \n",
- " 2012/05/10 \n",
+ " NiELS\n",
+ " \n",
" | \n",
" \n",
" \n",
- " [None,⸱5700.0] \n",
+ " Böhr//((%%\n",
+ " \n",
" | \n",
" \n",
" \n",
- " 2012-05-10 \n",
+ " 551\n",
+ " \n",
" | \n",
" \n",
" \n",
- " 2014-06-24⸱00:00:00 \n",
+ " pizza\n",
+ " \n",
" | \n",
" \n",
" \n",
- " True \n",
+ " 8\n",
+ " \n",
" | \n",
" \n",
" \n",
- " bytearray(b'None') \n",
+ " 1990/07/09\n",
+ " \n",
" | \n",
" \n",
" \n",
- " None \n",
+ " give\n",
+ " \n",
" | \n",
" \n",
"
\n",
@@ -2038,67 +1513,103 @@
" \n",
" \n",
" \n",
- " Metroplex_)^$ \n",
+ " 4\n",
+ " \n",
" | \n",
" \n",
" \n",
- " 300 \n",
+ " PAUL\n",
+ " \n",
" | \n",
" \n",
" \n",
- " Battle⸱Station \n",
+ " dirac$\n",
+ " \n",
" | \n",
" \n",
" \n",
- " 8 \n",
+ " 521\n",
+ " \n",
" | \n",
" \n",
" \n",
- " 5000000 \n",
+ " pizza\n",
+ " \n",
" | \n",
" \n",
" \n",
- " None \n",
+ " 8\n",
+ " \n",
" | \n",
" \n",
" \n",
- " ['Metroflex'] \n",
+ " 1954/07/10\n",
+ " \n",
" | \n",
" \n",
" \n",
- " None \n",
+ " you\n",
+ " \n",
" | \n",
" \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
" \n",
- " 1980/04/10 \n",
+ " 5\n",
+ " \n",
" | \n",
" \n",
" \n",
- " 2011/04/10 \n",
+ " Albert\n",
+ " \n",
" | \n",
" \n",
" \n",
- " [91.44000244140625,⸱None] \n",
+ " Einstein\n",
+ " \n",
" | \n",
" \n",
" \n",
- " 2011-04-10 \n",
+ " 634\n",
+ " \n",
" | \n",
" \n",
" \n",
- " 2014-06-24⸱00:00:00 \n",
+ " pizza\n",
+ " \n",
" | \n",
" \n",
" \n",
- " True \n",
+ " 8\n",
+ " \n",
" | \n",
" \n",
" \n",
- " bytearray(b'Battle⸱Station') \n",
+ " 1990/07/11\n",
+ " \n",
" | \n",
" \n",
" \n",
- " None \n",
+ " up\n",
+ " \n",
" | \n",
" \n",
"
\n",
@@ -2107,7 +1618,7 @@
"
\n",
"\n",
"\n",
- "Viewing 12 of 12 rows / 16 columns
\n",
+ "Viewing 5 of 19 rows / 8 columns
\n",
"1 partition(s)
\n"
],
"text/plain": [
@@ -2119,26 +1630,12 @@
}
],
"source": [
- "op.append([df,df], like=\"rows\").table()"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## IO Operations"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "### Load from file"
+ "df_csv =op.load.csv(\"data/foo.csv\").table(5)"
]
},
{
"cell_type": "code",
- "execution_count": 12,
+ "execution_count": 14,
"metadata": {},
"outputs": [
{
@@ -2149,7 +1646,7 @@
"\n",
"\n",
"\n",
- "Viewing 5 of 19 rows / 8 columns
\n",
+ "Viewing 4 of 4 rows / 5 columns
\n",
"1 partition(s)
\n",
"\n",
"\n",
@@ -2157,8 +1654,8 @@
" \n",
" \n",
" \n",
- " id \n",
- " 1 (int) \n",
+ " Sepal length \n",
+ " 1 (string) \n",
" \n",
" \n",
" nullable\n",
@@ -2167,8 +1664,8 @@
" | \n",
" \n",
" \n",
- " firstName \n",
- " 2 (string) \n",
+ " Sepal width \n",
+ " 2 (double) \n",
" \n",
" \n",
" nullable\n",
@@ -2177,8 +1674,8 @@
" | \n",
" \n",
" \n",
- " lastName \n",
- " 3 (string) \n",
+ " Petal length \n",
+ " 3 (double) \n",
" \n",
" \n",
" nullable\n",
@@ -2187,8 +1684,8 @@
" | \n",
" \n",
" \n",
- " billingId \n",
- " 4 (int) \n",
+ " Petal width \n",
+ " 4 (string) \n",
" \n",
" \n",
" nullable\n",
@@ -2197,118 +1694,50 @@
" | \n",
" \n",
" \n",
- " product \n",
+ " Species \n",
" 5 (string) \n",
" \n",
" \n",
" nullable\n",
" \n",
- " \n",
- " | \n",
- " \n",
- " \n",
- " price \n",
- " 6 (int) \n",
- " \n",
- " \n",
- " nullable\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " \n",
- " birth \n",
- " 7 (string) \n",
- " \n",
- " \n",
- " nullable\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- " \n",
- " dummyCol \n",
- " 8 (string) \n",
- " \n",
- " \n",
- " nullable\n",
- " \n",
- " \n",
- " | \n",
- " \n",
- "
\n",
- "\n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " 1 \n",
- " | \n",
- " \n",
- " \n",
- " Luis \n",
- " | \n",
- " \n",
- " \n",
- " Alvarez$$%! \n",
- " | \n",
- " \n",
- " \n",
- " 123 \n",
- " | \n",
- " \n",
- " \n",
- " Cake \n",
- " | \n",
- " \n",
- " \n",
- " 10 \n",
- " | \n",
- " \n",
- " \n",
- " 1980/07/07 \n",
- " | \n",
- " \n",
- " \n",
- " never \n",
- " | \n",
+ " \n",
+ " \n",
" \n",
"
\n",
+ "\n",
+ " \n",
+ " \n",
" \n",
" \n",
" \n",
" \n",
- " 2 \n",
- " | \n",
- " \n",
- " \n",
- " André \n",
- " | \n",
- " \n",
- " \n",
- " Ampère \n",
- " | \n",
- " \n",
- " \n",
- " 423 \n",
+ " 5.0\n",
+ " \n",
" | \n",
" \n",
" \n",
- " piza \n",
+ " 3.6\n",
+ " \n",
" | \n",
" \n",
" \n",
- " 8 \n",
+ " 1.4\n",
+ " \n",
" | \n",
" \n",
" \n",
- " 1950/07/08 \n",
+ " 0.2\n",
+ " \n",
" | \n",
" \n",
" \n",
- " gonna \n",
+ " I.⋅setosa\n",
+ " \n",
" | \n",
" \n",
"
\n",
@@ -2316,35 +1745,33 @@
" \n",
" \n",
" \n",
- " 3 \n",
- " | \n",
- " \n",
- " \n",
- " NiELS \n",
- " | \n",
- " \n",
- " \n",
- " Böhr//((%% \n",
- " | \n",
- " \n",
- " \n",
- " 551 \n",
+ " 5.0\n",
+ " \n",
" | \n",
" \n",
" \n",
- " pizza \n",
+ " 3.6\n",
+ " \n",
" | \n",
" \n",
" \n",
- " 8 \n",
+ " 1.4\n",
+ " \n",
" | \n",
" \n",
" \n",
- " 1990/07/09 \n",
+ " 0.2\n",
+ " \n",
" | \n",
" \n",
" \n",
- " give \n",
+ " I.⋅setosa\n",
+ " \n",
" | \n",
" \n",
"
\n",
@@ -2352,35 +1779,33 @@
" \n",
" \n",
" \n",
- " 4 \n",
- " | \n",
- " \n",
- " \n",
- " PAUL \n",
- " | \n",
- " \n",
- " \n",
- " dirac$ \n",
- " | \n",
- " \n",
- " \n",
- " 521 \n",
+ " 5.0\n",
+ " \n",
" | \n",
" \n",
" \n",
- " pizza \n",
+ " 3.6\n",
+ " \n",
" | \n",
" \n",
" \n",
- " 8 \n",
+ " 1.4\n",
+ " \n",
" | \n",
" \n",
" \n",
- " 1954/07/10 \n",
+ " 0.2\n",
+ " \n",
" | \n",
" \n",
" \n",
- " you \n",
+ " I.⋅setosa\n",
+ " \n",
" | \n",
" \n",
"
\n",
@@ -2388,35 +1813,33 @@
" \n",
" \n",
" \n",
- " 5 \n",
- " | \n",
- " \n",
- " \n",
- " Albert \n",
- " | \n",
- " \n",
- " \n",
- " Einstein \n",
- " | \n",
- " \n",
- " \n",
- " 634 \n",
+ " 5.0\n",
+ " \n",
" | \n",
" \n",
" \n",
- " pizza \n",
+ " 3.6\n",
+ " \n",
" | \n",
" \n",
" \n",
- " 8 \n",
+ " 1.4\n",
+ " \n",
" | \n",
" \n",
" \n",
- " 1990/07/11 \n",
+ " 0.2\n",
+ " \n",
" | \n",
" \n",
" \n",
- " up \n",
+ " I.⋅setosa\n",
+ " \n",
" | \n",
" \n",
"
\n",
@@ -2425,7 +1848,7 @@
"
\n",
"\n",
"\n",
- "Viewing 5 of 19 rows / 8 columns
\n",
+ "Viewing 4 of 4 rows / 5 columns
\n",
"1 partition(s)
\n"
],
"text/plain": [
@@ -2437,12 +1860,12 @@
}
],
"source": [
- "df_csv =op.load.csv(\"data/foo.csv\").table(5)"
+ "df_csv =op.load.tsv(\"data/foo.tsv\").table(5)"
]
},
{
"cell_type": "code",
- "execution_count": 13,
+ "execution_count": 21,
"metadata": {},
"outputs": [
{
@@ -2548,35 +1971,51 @@
" \n",
" \n",
" \n",
- " 123 \n",
+ " 123\n",
+ " \n",
" | \n",
" \n",
" \n",
- " 1980/07/07 \n",
+ " 1980/07/07\n",
+ " \n",
" | \n",
" \n",
" \n",
- " never \n",
+ " never\n",
+ " \n",
" | \n",
" \n",
" \n",
- " Luis \n",
+ " Luis\n",
+ " \n",
" | \n",
" \n",
" \n",
- " 1 \n",
+ " 1\n",
+ " \n",
" | \n",
" \n",
" \n",
- " Alvarez$$%! \n",
+ " Alvarez$$%!\n",
+ " \n",
" | \n",
" \n",
" \n",
- " 10 \n",
+ " 10\n",
+ " \n",
" | \n",
" \n",
" \n",
- " Cake \n",
+ " Cake\n",
+ " \n",
" | \n",
" \n",
"
\n",
@@ -2584,35 +2023,51 @@
" \n",
" \n",
" \n",
- " 423 \n",
+ " 423\n",
+ " \n",
" | \n",
" \n",
" \n",
- " 1950/07/08 \n",
+ " 1950/07/08\n",
+ " \n",
" | \n",
" \n",
" \n",
- " gonna \n",
+ " gonna\n",
+ " \n",
" | \n",
" \n",
" \n",
- " André \n",
+ " André\n",
+ " \n",
" | \n",
" \n",
" \n",
- " 2 \n",
+ " 2\n",
+ " \n",
" | \n",
" \n",
" \n",
- " Ampère \n",
+ " Ampère\n",
+ " \n",
" | \n",
" \n",
" \n",
- " 8 \n",
+ " 8\n",
+ " \n",
" | \n",
" \n",
" \n",
- " piza \n",
+ " piza\n",
+ " \n",
" | \n",
" \n",
"
\n",
@@ -2620,35 +2075,51 @@
" \n",
" \n",
" \n",
- " 551 \n",
+ " 551\n",
+ " \n",
" | \n",
" \n",
" \n",
- " 1990/07/09 \n",
+ " 1990/07/09\n",
+ " \n",
" | \n",
" \n",
" \n",
- " give \n",
+ " give\n",
+ " \n",
" | \n",
" \n",
" \n",
- " NiELS \n",
+ " NiELS\n",
+ " \n",
" | \n",
" \n",
" \n",
- " 3 \n",
+ " 3\n",
+ " \n",
" | \n",
" \n",
" \n",
- " Böhr//((%% \n",
+ " Böhr//((%%\n",
+ " \n",
" | \n",
" \n",
" \n",
- " 8 \n",
+ " 8\n",
+ " \n",
" | \n",
" \n",
" \n",
- " pizza \n",
+ " pizza\n",
+ " \n",
" | \n",
" \n",
"
\n",
@@ -2656,35 +2127,51 @@
" \n",
" \n",
" \n",
- " 521 \n",
+ " 521\n",
+ " \n",
" | \n",
" \n",
" \n",
- " 1954/07/10 \n",
+ " 1954/07/10\n",
+ " \n",
" | \n",
" \n",
" \n",
- " you \n",
+ " you\n",
+ " \n",
" | \n",
" \n",
" \n",
- " PAUL \n",
+ " PAUL\n",
+ " \n",
" | \n",
" \n",
" \n",
- " 4 \n",
+ " 4\n",
+ " \n",
" | \n",
" \n",
" \n",
- " dirac$ \n",
+ " dirac$\n",
+ " \n",
" | \n",
" \n",
" \n",
- " 8 \n",
+ " 8\n",
+ " \n",
" | \n",
" \n",
" \n",
- " pizza \n",
+ " pizza\n",
+ " \n",
" | \n",
" \n",
"
\n",
@@ -2692,35 +2179,51 @@
" \n",
" \n",
" \n",
- " 634 \n",
+ " 634\n",
+ " \n",
" | \n",
" \n",
" \n",
- " 1990/07/11 \n",
+ " 1990/07/11\n",
+ " \n",
" | \n",
" \n",
" \n",
- " up \n",
+ " up\n",
+ " \n",
" | \n",
" \n",
" \n",
- " Albert \n",
+ " Albert\n",
+ " \n",
" | \n",
" \n",
" \n",
- " 5 \n",
+ " 5\n",
+ " \n",
" | \n",
" \n",
" \n",
- " Einstein \n",
+ " Einstein\n",
+ " \n",
" | \n",
" \n",
" \n",
- " 8 \n",
+ " 8\n",
+ " \n",
" | \n",
" \n",
" \n",
- " pizza \n",
+ " pizza\n",
+ " \n",
" | \n",
" \n",
"
\n",
@@ -2746,7 +2249,7 @@
},
{
"cell_type": "code",
- "execution_count": 14,
+ "execution_count": 22,
"metadata": {},
"outputs": [
{
@@ -2852,35 +2355,51 @@
" \n",
" \n",
" \n",
- " 1 \n",
+ " 1\n",
+ " \n",
" | \n",
" \n",
" \n",
- " Luis \n",
+ " Luis\n",
+ " \n",
" | \n",
" \n",
" \n",
- " Alvarez$$%! \n",
+ " Alvarez$$%!\n",
+ " \n",
" | \n",
" \n",
" \n",
- " 123 \n",
+ " 123\n",
+ " \n",
" | \n",
" \n",
" \n",
- " Cake \n",
+ " Cake\n",
+ " \n",
" | \n",
" \n",
" \n",
- " 10 \n",
+ " 10\n",
+ " \n",
" | \n",
" \n",
" \n",
- " 1980/07/07 \n",
+ " 1980/07/07\n",
+ " \n",
" | \n",
" \n",
" \n",
- " never \n",
+ " never\n",
+ " \n",
" | \n",
" \n",
"
\n",
@@ -2888,35 +2407,51 @@
" \n",
" \n",
" \n",
- " 2 \n",
+ " 2\n",
+ " \n",
" | \n",
" \n",
" \n",
- " André \n",
+ " André\n",
+ " \n",
" | \n",
" \n",
" \n",
- " Ampère \n",
+ " Ampère\n",
+ " \n",
" | \n",
" \n",
" \n",
- " 423 \n",
+ " 423\n",
+ " \n",
" | \n",
" \n",
" \n",
- " piza \n",
+ " piza\n",
+ " \n",
" | \n",
" \n",
" \n",
- " 8 \n",
+ " 8\n",
+ " \n",
" | \n",
" \n",
" \n",
- " 1950/07/08 \n",
+ " 1950/07/08\n",
+ " \n",
" | \n",
" \n",
" \n",
- " gonna \n",
+ " gonna\n",
+ " \n",
" | \n",
" \n",
"
\n",
@@ -2924,35 +2459,51 @@
" \n",
" \n",
" \n",
- " 3 \n",
+ " 3\n",
+ " \n",
" | \n",
" \n",
" \n",
- " NiELS \n",
+ " NiELS\n",
+ " \n",
" | \n",
" \n",
" \n",
- " Böhr//((%% \n",
+ " Böhr//((%%\n",
+ " \n",
" | \n",
" \n",
" \n",
- " 551 \n",
+ " 551\n",
+ " \n",
" | \n",
" \n",
" \n",
- " pizza \n",
+ " pizza\n",
+ " \n",
" | \n",
" \n",
" \n",
- " 8 \n",
+ " 8\n",
+ " \n",
" | \n",
" \n",
" \n",
- " 1990/07/09 \n",
+ " 1990/07/09\n",
+ " \n",
" | \n",
" \n",
" \n",
- " give \n",
+ " give\n",
+ " \n",
" | \n",
" \n",
"
\n",
@@ -2960,35 +2511,51 @@
" \n",
" \n",
" \n",
- " 4 \n",
+ " 4\n",
+ " \n",
" | \n",
" \n",
" \n",
- " PAUL \n",
+ " PAUL\n",
+ " \n",
" | \n",
" \n",
" \n",
- " dirac$ \n",
+ " dirac$\n",
+ " \n",
" | \n",
" \n",
" \n",
- " 521 \n",
+ " 521\n",
+ " \n",
" | \n",
" \n",
" \n",
- " pizza \n",
+ " pizza\n",
+ " \n",
" | \n",
" \n",
" \n",
- " 8 \n",
+ " 8\n",
+ " \n",
" | \n",
" \n",
" \n",
- " 1954/07/10 \n",
+ " 1954/07/10\n",
+ " \n",
" | \n",
" \n",
" \n",
- " you \n",
+ " you\n",
+ " \n",
" | \n",
" \n",
"
\n",
@@ -2996,35 +2563,51 @@
" \n",
" \n",
" \n",
- " 5 \n",
+ " 5\n",
+ " \n",
" | \n",
" \n",
" \n",
- " Albert \n",
+ " Albert\n",
+ " \n",
" | \n",
" \n",
" \n",
- " Einstein \n",
+ " Einstein\n",
+ " \n",
" | \n",
" \n",
" \n",
- " 634 \n",
+ " 634\n",
+ " \n",
" | \n",
" \n",
" \n",
- " pizza \n",
+ " pizza\n",
+ " \n",
" | \n",
" \n",
" \n",
- " 8 \n",
+ " 8\n",
+ " \n",
" | \n",
" \n",
" \n",
- " 1990/07/11 \n",
+ " 1990/07/11\n",
+ " \n",
" | \n",
" \n",
" \n",
- " up \n",
+ " up\n",
+ " \n",
" | \n",
" \n",
"
\n",
diff --git a/examples/new-api-sandbox.ipynb b/examples/new-api-sandbox.ipynb
index 82cf173b9..25f289159 100644
--- a/examples/new-api-sandbox.ipynb
+++ b/examples/new-api-sandbox.ipynb
@@ -24,7 +24,19 @@
"cell_type": "code",
"execution_count": 3,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "C:\\Users\\argenisleon\\Anaconda3\\lib\\site-packages\\socks.py:58: DeprecationWarning: Using or importing the ABCs from 'collections' instead of from 'collections.abc' is deprecated, and in 3.8 it will stop working\n",
+ " from collections import Callable\n",
+ "\n",
+ " You are using PySparkling of version 2.4.10, but your PySpark is of\n",
+ " version 2.3.1. Please make sure Spark and PySparkling versions are compatible. \n"
+ ]
+ }
+ ],
"source": [
"from optimus import Optimus"
]
@@ -41,10 +53,10 @@
"INFO:optimus:Just check that Spark and all necessary environments vars are present...\n",
"INFO:optimus:-----\n",
"INFO:optimus:SPARK_HOME=C:\\opt\\spark\\spark-2.3.1-bin-hadoop2.7\n",
- "INFO:optimus:HADOOP_HOME=C:\\opt\\spark\\spark-2.3.1-bin-hadoop2.7\n",
+ "INFO:optimus:HADOOP_HOME=C:\\opt\\hadoop-2.7.7\n",
"INFO:optimus:PYSPARK_PYTHON=C:\\Users\\argenisleon\\Anaconda3\\python.exe\n",
"INFO:optimus:PYSPARK_DRIVER_PYTHON=jupyter\n",
- "INFO:optimus:PYSPARK_SUBMIT_ARGS=--conf \"spark.sql.catalogImplementation=hive\" pyspark-shell\n",
+ "INFO:optimus:PYSPARK_SUBMIT_ARGS=--packages com.databricks:spark-avro_2.11:4.0.0 --conf \"spark.sql.catalogImplementation=hive\" pyspark-shell\n",
"INFO:optimus:JAVA_HOME=C:\\java\n",
"INFO:optimus:Pyarrow Installed\n",
"INFO:optimus:-----\n",
@@ -105,46 +117,925 @@
" border-bottom: 1px solid #cccccc;\n",
" }\n",
"\n",
- " /* Profiler */\n",
- " .main{\n",
- " width:100%;\n",
- " overflow:auto;\n",
- " border-bottom:1px solid #eeeeee;\n",
- " padding: 10px 0;\n",
- " }\n",
- " .panel_profiler{\n",
- " margin-right:2%;\n",
- " float:left;\n",
- " padding-bottom:2%;\n",
- " }\n",
- " .panel_profiler tbody{\n",
- " font-family:monospace;\n",
- " }\n",
- " .title_profiler{\n",
- " padding:20px;\n",
- " background-color: #eeeeee\n",
- " }\n",
- " .info{\n",
- " overflow: auto\n",
- " }\n",
- " .main td, main th{\n",
- " padding:0em\n",
- " }\n",
- " .panel_profiler td {\n",
- " padding:0.2em\n",
- " }\n",
- " .none, .true{\n",
- " color:#0000ff\n",
- " }\n",
- " .optimus_table th {\n",
- " font-family:sans-serif;\n",
- " }\n",
+ " /* Profiler */\n",
+ " .main{\n",
+ " width:100%;\n",
+ " overflow:auto;\n",
+ " border-bottom:1px solid #eeeeee;\n",
+ " padding: 10px 0;\n",
+ " }\n",
+ " .panel_profiler{\n",
+ " margin-right:2%;\n",
+ " float:left;\n",
+ " padding-bottom:2%;\n",
+ " }\n",
+ " .panel_profiler tbody{\n",
+ " font-family:monospace;\n",
+ " }\n",
+ " .title_profiler{\n",
+ " padding:20px;\n",
+ " background-color: #eeeeee\n",
+ " }\n",
+ " .info{\n",
+ " overflow: auto\n",
+ " }\n",
+ " .main td, main th{\n",
+ " padding:0em\n",
+ " }\n",
+ " .panel_profiler td {\n",
+ " padding:0.2em\n",
+ " }\n",
+ " .none, .true{\n",
+ " color:#0000ff\n",
+ " }\n",
+ " .optimus_table th {\n",
+ " font-family:sans-serif;\n",
+ " }\n",
+ "\n",
+ " .info_items{\n",
+ " font-family:sans-serif;\n",
+ " font-size:10px;\n",
+ " }\n",
+ ""
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "op= Optimus(master=\"local\", app_name= \"optimus\", verbose = True)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {},
+ "outputs": [
+ {
+ "ename": "AnalysisException",
+ "evalue": "'java.lang.RuntimeException: java.lang.RuntimeException: Unable to instantiate org.apache.hadoop.hive.ql.metadata.SessionHiveMetaStoreClient;'",
+ "output_type": "error",
+ "traceback": [
+ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
+ "\u001b[1;31mPy4JJavaError\u001b[0m Traceback (most recent call last)",
+ "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\pyspark\\sql\\utils.py\u001b[0m in \u001b[0;36mdeco\u001b[1;34m(*a, **kw)\u001b[0m\n\u001b[0;32m 62\u001b[0m \u001b[1;32mtry\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 63\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mf\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m*\u001b[0m\u001b[0ma\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkw\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 64\u001b[0m \u001b[1;32mexcept\u001b[0m \u001b[0mpy4j\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mprotocol\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mPy4JJavaError\u001b[0m \u001b[1;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
+ "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\py4j\\protocol.py\u001b[0m in \u001b[0;36mget_return_value\u001b[1;34m(answer, gateway_client, target_id, name)\u001b[0m\n\u001b[0;32m 327\u001b[0m \u001b[1;34m\"An error occurred while calling {0}{1}{2}.\\n\"\u001b[0m\u001b[1;33m.\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 328\u001b[1;33m format(target_id, \".\", name), value)\n\u001b[0m\u001b[0;32m 329\u001b[0m \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
+ "\u001b[1;31mPy4JJavaError\u001b[0m: An error occurred while calling o31.applySchemaToPythonRDD.\n: org.apache.spark.sql.AnalysisException: java.lang.RuntimeException: java.lang.RuntimeException: Unable to instantiate org.apache.hadoop.hive.ql.metadata.SessionHiveMetaStoreClient;\r\n\tat org.apache.spark.sql.hive.HiveExternalCatalog.withClient(HiveExternalCatalog.scala:106)\r\n\tat org.apache.spark.sql.hive.HiveExternalCatalog.databaseExists(HiveExternalCatalog.scala:194)\r\n\tat org.apache.spark.sql.internal.SharedState.externalCatalog$lzycompute(SharedState.scala:114)\r\n\tat org.apache.spark.sql.internal.SharedState.externalCatalog(SharedState.scala:102)\r\n\tat org.apache.spark.sql.hive.HiveSessionStateBuilder.externalCatalog(HiveSessionStateBuilder.scala:39)\r\n\tat org.apache.spark.sql.hive.HiveSessionStateBuilder.catalog$lzycompute(HiveSessionStateBuilder.scala:54)\r\n\tat org.apache.spark.sql.hive.HiveSessionStateBuilder.catalog(HiveSessionStateBuilder.scala:52)\r\n\tat org.apache.spark.sql.hive.HiveSessionStateBuilder$$anon$1.(HiveSessionStateBuilder.scala:69)\r\n\tat org.apache.spark.sql.hive.HiveSessionStateBuilder.analyzer(HiveSessionStateBuilder.scala:69)\r\n\tat org.apache.spark.sql.internal.BaseSessionStateBuilder$$anonfun$build$2.apply(BaseSessionStateBuilder.scala:293)\r\n\tat org.apache.spark.sql.internal.BaseSessionStateBuilder$$anonfun$build$2.apply(BaseSessionStateBuilder.scala:293)\r\n\tat org.apache.spark.sql.internal.SessionState.analyzer$lzycompute(SessionState.scala:79)\r\n\tat org.apache.spark.sql.internal.SessionState.analyzer(SessionState.scala:79)\r\n\tat org.apache.spark.sql.execution.QueryExecution.analyzed$lzycompute(QueryExecution.scala:57)\r\n\tat org.apache.spark.sql.execution.QueryExecution.analyzed(QueryExecution.scala:55)\r\n\tat org.apache.spark.sql.execution.QueryExecution.assertAnalyzed(QueryExecution.scala:47)\r\n\tat org.apache.spark.sql.Dataset$.ofRows(Dataset.scala:74)\r\n\tat org.apache.spark.sql.SparkSession.internalCreateDataFrame(SparkSession.scala:577)\r\n\tat org.apache.spark.sql.SparkSession.applySchemaToPythonRDD(SparkSession.scala:752)\r\n\tat org.apache.spark.sql.SparkSession.applySchemaToPythonRDD(SparkSession.scala:737)\r\n\tat sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\r\n\tat sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\r\n\tat sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\r\n\tat java.lang.reflect.Method.invoke(Method.java:498)\r\n\tat py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)\r\n\tat py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)\r\n\tat py4j.Gateway.invoke(Gateway.java:282)\r\n\tat py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)\r\n\tat py4j.commands.CallCommand.execute(CallCommand.java:79)\r\n\tat py4j.GatewayConnection.run(GatewayConnection.java:238)\r\n\tat java.lang.Thread.run(Thread.java:748)\r\nCaused by: java.lang.RuntimeException: java.lang.RuntimeException: Unable to instantiate org.apache.hadoop.hive.ql.metadata.SessionHiveMetaStoreClient\r\n\tat org.apache.hadoop.hive.ql.session.SessionState.start(SessionState.java:522)\r\n\tat org.apache.spark.sql.hive.client.HiveClientImpl.newState(HiveClientImpl.scala:180)\r\n\tat org.apache.spark.sql.hive.client.HiveClientImpl.(HiveClientImpl.scala:114)\r\n\tat sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)\r\n\tat sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)\r\n\tat sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)\r\n\tat java.lang.reflect.Constructor.newInstance(Constructor.java:423)\r\n\tat org.apache.spark.sql.hive.client.IsolatedClientLoader.createClient(IsolatedClientLoader.scala:264)\r\n\tat org.apache.spark.sql.hive.HiveUtils$.newClientForMetadata(HiveUtils.scala:385)\r\n\tat org.apache.spark.sql.hive.HiveUtils$.newClientForMetadata(HiveUtils.scala:287)\r\n\tat org.apache.spark.sql.hive.HiveExternalCatalog.client$lzycompute(HiveExternalCatalog.scala:66)\r\n\tat org.apache.spark.sql.hive.HiveExternalCatalog.client(HiveExternalCatalog.scala:65)\r\n\tat org.apache.spark.sql.hive.HiveExternalCatalog$$anonfun$databaseExists$1.apply$mcZ$sp(HiveExternalCatalog.scala:195)\r\n\tat org.apache.spark.sql.hive.HiveExternalCatalog$$anonfun$databaseExists$1.apply(HiveExternalCatalog.scala:195)\r\n\tat org.apache.spark.sql.hive.HiveExternalCatalog$$anonfun$databaseExists$1.apply(HiveExternalCatalog.scala:195)\r\n\tat org.apache.spark.sql.hive.HiveExternalCatalog.withClient(HiveExternalCatalog.scala:97)\r\n\t... 30 more\r\nCaused by: java.lang.RuntimeException: Unable to instantiate org.apache.hadoop.hive.ql.metadata.SessionHiveMetaStoreClient\r\n\tat org.apache.hadoop.hive.metastore.MetaStoreUtils.newInstance(MetaStoreUtils.java:1523)\r\n\tat org.apache.hadoop.hive.metastore.RetryingMetaStoreClient.(RetryingMetaStoreClient.java:86)\r\n\tat org.apache.hadoop.hive.metastore.RetryingMetaStoreClient.getProxy(RetryingMetaStoreClient.java:132)\r\n\tat org.apache.hadoop.hive.metastore.RetryingMetaStoreClient.getProxy(RetryingMetaStoreClient.java:104)\r\n\tat org.apache.hadoop.hive.ql.metadata.Hive.createMetaStoreClient(Hive.java:3005)\r\n\tat org.apache.hadoop.hive.ql.metadata.Hive.getMSC(Hive.java:3024)\r\n\tat org.apache.hadoop.hive.ql.session.SessionState.start(SessionState.java:503)\r\n\t... 45 more\r\nCaused by: java.lang.reflect.InvocationTargetException\r\n\tat sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)\r\n\tat sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)\r\n\tat sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)\r\n\tat java.lang.reflect.Constructor.newInstance(Constructor.java:423)\r\n\tat org.apache.hadoop.hive.metastore.MetaStoreUtils.newInstance(MetaStoreUtils.java:1521)\r\n\t... 51 more\r\nCaused by: javax.jdo.JDOFatalDataStoreException: Unable to open a test connection to the given database. JDBC url = jdbc:derby:;databaseName=metastore_db;create=true, username = APP. Terminating connection pool (set lazyInit to true if you expect to start your database after your app). Original Exception: ------\r\njava.sql.SQLException: Failed to start database 'metastore_db' with class loader org.apache.spark.sql.hive.client.IsolatedClientLoader$$anon$1@4cbe88e2, see the next exception for details.\r\n\tat org.apache.derby.impl.jdbc.SQLExceptionFactory.getSQLException(Unknown Source)\r\n\tat org.apache.derby.impl.jdbc.SQLExceptionFactory.getSQLException(Unknown Source)\r\n\tat org.apache.derby.impl.jdbc.Util.seeNextException(Unknown Source)\r\n\tat org.apache.derby.impl.jdbc.EmbedConnection.bootDatabase(Unknown Source)\r\n\tat org.apache.derby.impl.jdbc.EmbedConnection.(Unknown Source)\r\n\tat org.apache.derby.jdbc.InternalDriver$1.run(Unknown Source)\r\n\tat org.apache.derby.jdbc.InternalDriver$1.run(Unknown Source)\r\n\tat java.security.AccessController.doPrivileged(Native Method)\r\n\tat org.apache.derby.jdbc.InternalDriver.getNewEmbedConnection(Unknown Source)\r\n\tat org.apache.derby.jdbc.InternalDriver.connect(Unknown Source)\r\n\tat org.apache.derby.jdbc.InternalDriver.connect(Unknown Source)\r\n\tat org.apache.derby.jdbc.AutoloadedDriver.connect(Unknown Source)\r\n\tat java.sql.DriverManager.getConnection(DriverManager.java:664)\r\n\tat java.sql.DriverManager.getConnection(DriverManager.java:208)\r\n\tat com.jolbox.bonecp.BoneCP.obtainRawInternalConnection(BoneCP.java:361)\r\n\tat com.jolbox.bonecp.BoneCP.(BoneCP.java:416)\r\n\tat com.jolbox.bonecp.BoneCPDataSource.getConnection(BoneCPDataSource.java:120)\r\n\tat org.datanucleus.store.rdbms.ConnectionFactoryImpl$ManagedConnectionImpl.getConnection(ConnectionFactoryImpl.java:501)\r\n\tat org.datanucleus.store.rdbms.RDBMSStoreManager.(RDBMSStoreManager.java:298)\r\n\tat sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)\r\n\tat sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)\r\n\tat sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)\r\n\tat java.lang.reflect.Constructor.newInstance(Constructor.java:423)\r\n\tat org.datanucleus.plugin.NonManagedPluginRegistry.createExecutableExtension(NonManagedPluginRegistry.java:631)\r\n\tat org.datanucleus.plugin.PluginManager.createExecutableExtension(PluginManager.java:301)\r\n\tat org.datanucleus.NucleusContext.createStoreManagerForProperties(NucleusContext.java:1187)\r\n\tat org.datanucleus.NucleusContext.initialise(NucleusContext.java:356)\r\n\tat org.datanucleus.api.jdo.JDOPersistenceManagerFactory.freezeConfiguration(JDOPersistenceManagerFactory.java:775)\r\n\tat org.datanucleus.api.jdo.JDOPersistenceManagerFactory.createPersistenceManagerFactory(JDOPersistenceManagerFactory.java:333)\r\n\tat org.datanucleus.api.jdo.JDOPersistenceManagerFactory.getPersistenceManagerFactory(JDOPersistenceManagerFactory.java:202)\r\n\tat sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\r\n\tat sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\r\n\tat sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\r\n\tat java.lang.reflect.Method.invoke(Method.java:498)\r\n\tat javax.jdo.JDOHelper$16.run(JDOHelper.java:1965)\r\n\tat java.security.AccessController.doPrivileged(Native Method)\r\n\tat javax.jdo.JDOHelper.invoke(JDOHelper.java:1960)\r\n\tat javax.jdo.JDOHelper.invokeGetPersistenceManagerFactoryOnImplementation(JDOHelper.java:1166)\r\n\tat javax.jdo.JDOHelper.getPersistenceManagerFactory(JDOHelper.java:808)\r\n\tat javax.jdo.JDOHelper.getPersistenceManagerFactory(JDOHelper.java:701)\r\n\tat org.apache.hadoop.hive.metastore.ObjectStore.getPMF(ObjectStore.java:365)\r\n\tat org.apache.hadoop.hive.metastore.ObjectStore.getPersistenceManager(ObjectStore.java:394)\r\n\tat org.apache.hadoop.hive.metastore.ObjectStore.initialize(ObjectStore.java:291)\r\n\tat org.apache.hadoop.hive.metastore.ObjectStore.setConf(ObjectStore.java:258)\r\n\tat org.apache.hadoop.util.ReflectionUtils.setConf(ReflectionUtils.java:76)\r\n\tat org.apache.hadoop.util.ReflectionUtils.newInstance(ReflectionUtils.java:136)\r\n\tat org.apache.hadoop.hive.metastore.RawStoreProxy.(RawStoreProxy.java:57)\r\n\tat org.apache.hadoop.hive.metastore.RawStoreProxy.getProxy(RawStoreProxy.java:66)\r\n\tat org.apache.hadoop.hive.metastore.HiveMetaStore$HMSHandler.newRawStore(HiveMetaStore.java:593)\r\n\tat org.apache.hadoop.hive.metastore.HiveMetaStore$HMSHandler.getMS(HiveMetaStore.java:571)\r\n\tat org.apache.hadoop.hive.metastore.HiveMetaStore$HMSHandler.createDefaultDB(HiveMetaStore.java:624)\r\n\tat org.apache.hadoop.hive.metastore.HiveMetaStore$HMSHandler.init(HiveMetaStore.java:461)\r\n\tat org.apache.hadoop.hive.metastore.RetryingHMSHandler.(RetryingHMSHandler.java:66)\r\n\tat org.apache.hadoop.hive.metastore.RetryingHMSHandler.getProxy(RetryingHMSHandler.java:72)\r\n\tat org.apache.hadoop.hive.metastore.HiveMetaStore.newRetryingHMSHandler(HiveMetaStore.java:5762)\r\n\tat org.apache.hadoop.hive.metastore.HiveMetaStoreClient.(HiveMetaStoreClient.java:199)\r\n\tat org.apache.hadoop.hive.ql.metadata.SessionHiveMetaStoreClient.(SessionHiveMetaStoreClient.java:74)\r\n\tat sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)\r\n\tat sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)\r\n\tat sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)\r\n\tat java.lang.reflect.Constructor.newInstance(Constructor.java:423)\r\n\tat org.apache.hadoop.hive.metastore.MetaStoreUtils.newInstance(MetaStoreUtils.java:1521)\r\n\tat org.apache.hadoop.hive.metastore.RetryingMetaStoreClient.(RetryingMetaStoreClient.java:86)\r\n\tat org.apache.hadoop.hive.metastore.RetryingMetaStoreClient.getProxy(RetryingMetaStoreClient.java:132)\r\n\tat org.apache.hadoop.hive.metastore.RetryingMetaStoreClient.getProxy(RetryingMetaStoreClient.java:104)\r\n\tat org.apache.hadoop.hive.ql.metadata.Hive.createMetaStoreClient(Hive.java:3005)\r\n\tat org.apache.hadoop.hive.ql.metadata.Hive.getMSC(Hive.java:3024)\r\n\tat org.apache.hadoop.hive.ql.session.SessionState.start(SessionState.java:503)\r\n\tat org.apache.spark.sql.hive.client.HiveClientImpl.newState(HiveClientImpl.scala:180)\r\n\tat org.apache.spark.sql.hive.client.HiveClientImpl.(HiveClientImpl.scala:114)\r\n\tat sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)\r\n\tat sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)\r\n\tat sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)\r\n\tat java.lang.reflect.Constructor.newInstance(Constructor.java:423)\r\n\tat org.apache.spark.sql.hive.client.IsolatedClientLoader.createClient(IsolatedClientLoader.scala:264)\r\n\tat org.apache.spark.sql.hive.HiveUtils$.newClientForMetadata(HiveUtils.scala:385)\r\n\tat org.apache.spark.sql.hive.HiveUtils$.newClientForMetadata(HiveUtils.scala:287)\r\n\tat org.apache.spark.sql.hive.HiveExternalCatalog.client$lzycompute(HiveExternalCatalog.scala:66)\r\n\tat org.apache.spark.sql.hive.HiveExternalCatalog.client(HiveExternalCatalog.scala:65)\r\n\tat org.apache.spark.sql.hive.HiveExternalCatalog$$anonfun$databaseExists$1.apply$mcZ$sp(HiveExternalCatalog.scala:195)\r\n\tat org.apache.spark.sql.hive.HiveExternalCatalog$$anonfun$databaseExists$1.apply(HiveExternalCatalog.scala:195)\r\n\tat org.apache.spark.sql.hive.HiveExternalCatalog$$anonfun$databaseExists$1.apply(HiveExternalCatalog.scala:195)\r\n\tat org.apache.spark.sql.hive.HiveExternalCatalog.withClient(HiveExternalCatalog.scala:97)\r\n\tat org.apache.spark.sql.hive.HiveExternalCatalog.databaseExists(HiveExternalCatalog.scala:194)\r\n\tat org.apache.spark.sql.internal.SharedState.externalCatalog$lzycompute(SharedState.scala:114)\r\n\tat org.apache.spark.sql.internal.SharedState.externalCatalog(SharedState.scala:102)\r\n\tat org.apache.spark.sql.hive.HiveSessionStateBuilder.externalCatalog(HiveSessionStateBuilder.scala:39)\r\n\tat org.apache.spark.sql.hive.HiveSessionStateBuilder.catalog$lzycompute(HiveSessionStateBuilder.scala:54)\r\n\tat org.apache.spark.sql.hive.HiveSessionStateBuilder.catalog(HiveSessionStateBuilder.scala:52)\r\n\tat org.apache.spark.sql.hive.HiveSessionStateBuilder$$anon$1.(HiveSessionStateBuilder.scala:69)\r\n\tat org.apache.spark.sql.hive.HiveSessionStateBuilder.analyzer(HiveSessionStateBuilder.scala:69)\r\n\tat org.apache.spark.sql.internal.BaseSessionStateBuilder$$anonfun$build$2.apply(BaseSessionStateBuilder.scala:293)\r\n\tat org.apache.spark.sql.internal.BaseSessionStateBuilder$$anonfun$build$2.apply(BaseSessionStateBuilder.scala:293)\r\n\tat org.apache.spark.sql.internal.SessionState.analyzer$lzycompute(SessionState.scala:79)\r\n\tat org.apache.spark.sql.internal.SessionState.analyzer(SessionState.scala:79)\r\n\tat org.apache.spark.sql.execution.QueryExecution.analyzed$lzycompute(QueryExecution.scala:57)\r\n\tat org.apache.spark.sql.execution.QueryExecution.analyzed(QueryExecution.scala:55)\r\n\tat org.apache.spark.sql.execution.QueryExecution.assertAnalyzed(QueryExecution.scala:47)\r\n\tat org.apache.spark.sql.Dataset$.ofRows(Dataset.scala:74)\r\n\tat org.apache.spark.sql.SparkSession.internalCreateDataFrame(SparkSession.scala:577)\r\n\tat org.apache.spark.sql.SparkSession.applySchemaToPythonRDD(SparkSession.scala:752)\r\n\tat org.apache.spark.sql.SparkSession.applySchemaToPythonRDD(SparkSession.scala:737)\r\n\tat sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\r\n\tat sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\r\n\tat sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\r\n\tat java.lang.reflect.Method.invoke(Method.java:498)\r\n\tat py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)\r\n\tat py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)\r\n\tat py4j.Gateway.invoke(Gateway.java:282)\r\n\tat py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)\r\n\tat py4j.commands.CallCommand.execute(CallCommand.java:79)\r\n\tat py4j.GatewayConnection.run(GatewayConnection.java:238)\r\n\tat java.lang.Thread.run(Thread.java:748)\r\nCaused by: ERROR XJ040: Failed to start database 'metastore_db' with class loader org.apache.spark.sql.hive.client.IsolatedClientLoader$$anon$1@4cbe88e2, see the next exception for details.\r\n\tat org.apache.derby.iapi.error.StandardException.newException(Unknown Source)\r\n\tat org.apache.derby.impl.jdbc.SQLExceptionFactory.wrapArgsForTransportAcrossDRDA(Unknown Source)\r\n\t... 113 more\r\nCaused by: ERROR XSDB6: Another instance of Derby may have already booted the database C:\\Users\\argenisleon\\Documents\\Optimus\\examples\\metastore_db.\r\n\tat org.apache.derby.iapi.error.StandardException.newException(Unknown Source)\r\n\tat org.apache.derby.iapi.error.StandardException.newException(Unknown Source)\r\n\tat org.apache.derby.impl.store.raw.data.BaseDataFileFactory.privGetJBMSLockOnDB(Unknown Source)\r\n\tat org.apache.derby.impl.store.raw.data.BaseDataFileFactory.run(Unknown Source)\r\n\tat java.security.AccessController.doPrivileged(Native Method)\r\n\tat org.apache.derby.impl.store.raw.data.BaseDataFileFactory.getJBMSLockOnDB(Unknown Source)\r\n\tat org.apache.derby.impl.store.raw.data.BaseDataFileFactory.boot(Unknown Source)\r\n\tat org.apache.derby.impl.services.monitor.BaseMonitor.boot(Unknown Source)\r\n\tat org.apache.derby.impl.services.monitor.TopService.bootModule(Unknown Source)\r\n\tat org.apache.derby.impl.services.monitor.BaseMonitor.startModule(Unknown Source)\r\n\tat org.apache.derby.impl.services.monitor.FileMonitor.startModule(Unknown Source)\r\n\tat org.apache.derby.iapi.services.monitor.Monitor.bootServiceModule(Unknown Source)\r\n\tat org.apache.derby.impl.store.raw.RawStore$6.run(Unknown Source)\r\n\tat java.security.AccessController.doPrivileged(Native Method)\r\n\tat org.apache.derby.impl.store.raw.RawStore.bootServiceModule(Unknown Source)\r\n\tat org.apache.derby.impl.store.raw.RawStore.boot(Unknown Source)\r\n\tat org.apache.derby.impl.services.monitor.BaseMonitor.boot(Unknown Source)\r\n\tat org.apache.derby.impl.services.monitor.TopService.bootModule(Unknown Source)\r\n\tat org.apache.derby.impl.services.monitor.BaseMonitor.startModule(Unknown Source)\r\n\tat org.apache.derby.impl.services.monitor.FileMonitor.startModule(Unknown Source)\r\n\tat org.apache.derby.iapi.services.monitor.Monitor.bootServiceModule(Unknown Source)\r\n\tat org.apache.derby.impl.store.access.RAMAccessManager$5.run(Unknown Source)\r\n\tat java.security.AccessController.doPrivileged(Native Method)\r\n\tat org.apache.derby.impl.store.access.RAMAccessManager.bootServiceModule(Unknown Source)\r\n\tat org.apache.derby.impl.store.access.RAMAccessManager.boot(Unknown Source)\r\n\tat org.apache.derby.impl.services.monitor.BaseMonitor.boot(Unknown Source)\r\n\tat org.apache.derby.impl.services.monitor.TopService.bootModule(Unknown Source)\r\n\tat org.apache.derby.impl.services.monitor.BaseMonitor.startModule(Unknown Source)\r\n\tat org.apache.derby.impl.services.monitor.FileMonitor.startModule(Unknown Source)\r\n\tat org.apache.derby.iapi.services.monitor.Monitor.bootServiceModule(Unknown Source)\r\n\tat org.apache.derby.impl.db.BasicDatabase$5.run(Unknown Source)\r\n\tat java.security.AccessController.doPrivileged(Native Method)\r\n\tat org.apache.derby.impl.db.BasicDatabase.bootServiceModule(Unknown Source)\r\n\tat org.apache.derby.impl.db.BasicDatabase.bootStore(Unknown Source)\r\n\tat org.apache.derby.impl.db.BasicDatabase.boot(Unknown Source)\r\n\tat org.apache.derby.impl.services.monitor.BaseMonitor.boot(Unknown Source)\r\n\tat org.apache.derby.impl.services.monitor.TopService.bootModule(Unknown Source)\r\n\tat org.apache.derby.impl.services.monitor.BaseMonitor.bootService(Unknown Source)\r\n\tat org.apache.derby.impl.services.monitor.BaseMonitor.startProviderService(Unknown Source)\r\n\tat org.apache.derby.impl.services.monitor.BaseMonitor.findProviderAndStartService(Unknown Source)\r\n\tat org.apache.derby.impl.services.monitor.BaseMonitor.startPersistentService(Unknown Source)\r\n\tat org.apache.derby.iapi.services.monitor.Monitor.startPersistentService(Unknown Source)\r\n\tat org.apache.derby.impl.jdbc.EmbedConnection$4.run(Unknown Source)\r\n\tat org.apache.derby.impl.jdbc.EmbedConnection$4.run(Unknown Source)\r\n\tat java.security.AccessController.doPrivileged(Native Method)\r\n\tat org.apache.derby.impl.jdbc.EmbedConnection.startPersistentService(Unknown Source)\r\n\t... 110 more\r\n------\r\n\nNestedThrowables:\njava.sql.SQLException: Unable to open a test connection to the given database. JDBC url = jdbc:derby:;databaseName=metastore_db;create=true, username = APP. Terminating connection pool (set lazyInit to true if you expect to start your database after your app). Original Exception: ------\r\njava.sql.SQLException: Failed to start database 'metastore_db' with class loader org.apache.spark.sql.hive.client.IsolatedClientLoader$$anon$1@4cbe88e2, see the next exception for details.\r\n\tat org.apache.derby.impl.jdbc.SQLExceptionFactory.getSQLException(Unknown Source)\r\n\tat org.apache.derby.impl.jdbc.SQLExceptionFactory.getSQLException(Unknown Source)\r\n\tat org.apache.derby.impl.jdbc.Util.seeNextException(Unknown Source)\r\n\tat org.apache.derby.impl.jdbc.EmbedConnection.bootDatabase(Unknown Source)\r\n\tat org.apache.derby.impl.jdbc.EmbedConnection.(Unknown Source)\r\n\tat org.apache.derby.jdbc.InternalDriver$1.run(Unknown Source)\r\n\tat org.apache.derby.jdbc.InternalDriver$1.run(Unknown Source)\r\n\tat java.security.AccessController.doPrivileged(Native Method)\r\n\tat org.apache.derby.jdbc.InternalDriver.getNewEmbedConnection(Unknown Source)\r\n\tat org.apache.derby.jdbc.InternalDriver.connect(Unknown Source)\r\n\tat org.apache.derby.jdbc.InternalDriver.connect(Unknown Source)\r\n\tat org.apache.derby.jdbc.AutoloadedDriver.connect(Unknown Source)\r\n\tat java.sql.DriverManager.getConnection(DriverManager.java:664)\r\n\tat java.sql.DriverManager.getConnection(DriverManager.java:208)\r\n\tat com.jolbox.bonecp.BoneCP.obtainRawInternalConnection(BoneCP.java:361)\r\n\tat com.jolbox.bonecp.BoneCP.(BoneCP.java:416)\r\n\tat com.jolbox.bonecp.BoneCPDataSource.getConnection(BoneCPDataSource.java:120)\r\n\tat org.datanucleus.store.rdbms.ConnectionFactoryImpl$ManagedConnectionImpl.getConnection(ConnectionFactoryImpl.java:501)\r\n\tat org.datanucleus.store.rdbms.RDBMSStoreManager.(RDBMSStoreManager.java:298)\r\n\tat sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)\r\n\tat sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)\r\n\tat sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)\r\n\tat java.lang.reflect.Constructor.newInstance(Constructor.java:423)\r\n\tat org.datanucleus.plugin.NonManagedPluginRegistry.createExecutableExtension(NonManagedPluginRegistry.java:631)\r\n\tat org.datanucleus.plugin.PluginManager.createExecutableExtension(PluginManager.java:301)\r\n\tat org.datanucleus.NucleusContext.createStoreManagerForProperties(NucleusContext.java:1187)\r\n\tat org.datanucleus.NucleusContext.initialise(NucleusContext.java:356)\r\n\tat org.datanucleus.api.jdo.JDOPersistenceManagerFactory.freezeConfiguration(JDOPersistenceManagerFactory.java:775)\r\n\tat org.datanucleus.api.jdo.JDOPersistenceManagerFactory.createPersistenceManagerFactory(JDOPersistenceManagerFactory.java:333)\r\n\tat org.datanucleus.api.jdo.JDOPersistenceManagerFactory.getPersistenceManagerFactory(JDOPersistenceManagerFactory.java:202)\r\n\tat sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\r\n\tat sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\r\n\tat sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\r\n\tat java.lang.reflect.Method.invoke(Method.java:498)\r\n\tat javax.jdo.JDOHelper$16.run(JDOHelper.java:1965)\r\n\tat java.security.AccessController.doPrivileged(Native Method)\r\n\tat javax.jdo.JDOHelper.invoke(JDOHelper.java:1960)\r\n\tat javax.jdo.JDOHelper.invokeGetPersistenceManagerFactoryOnImplementation(JDOHelper.java:1166)\r\n\tat javax.jdo.JDOHelper.getPersistenceManagerFactory(JDOHelper.java:808)\r\n\tat javax.jdo.JDOHelper.getPersistenceManagerFactory(JDOHelper.java:701)\r\n\tat org.apache.hadoop.hive.metastore.ObjectStore.getPMF(ObjectStore.java:365)\r\n\tat org.apache.hadoop.hive.metastore.ObjectStore.getPersistenceManager(ObjectStore.java:394)\r\n\tat org.apache.hadoop.hive.metastore.ObjectStore.initialize(ObjectStore.java:291)\r\n\tat org.apache.hadoop.hive.metastore.ObjectStore.setConf(ObjectStore.java:258)\r\n\tat org.apache.hadoop.util.ReflectionUtils.setConf(ReflectionUtils.java:76)\r\n\tat org.apache.hadoop.util.ReflectionUtils.newInstance(ReflectionUtils.java:136)\r\n\tat org.apache.hadoop.hive.metastore.RawStoreProxy.(RawStoreProxy.java:57)\r\n\tat org.apache.hadoop.hive.metastore.RawStoreProxy.getProxy(RawStoreProxy.java:66)\r\n\tat org.apache.hadoop.hive.metastore.HiveMetaStore$HMSHandler.newRawStore(HiveMetaStore.java:593)\r\n\tat org.apache.hadoop.hive.metastore.HiveMetaStore$HMSHandler.getMS(HiveMetaStore.java:571)\r\n\tat org.apache.hadoop.hive.metastore.HiveMetaStore$HMSHandler.createDefaultDB(HiveMetaStore.java:624)\r\n\tat org.apache.hadoop.hive.metastore.HiveMetaStore$HMSHandler.init(HiveMetaStore.java:461)\r\n\tat org.apache.hadoop.hive.metastore.RetryingHMSHandler.(RetryingHMSHandler.java:66)\r\n\tat org.apache.hadoop.hive.metastore.RetryingHMSHandler.getProxy(RetryingHMSHandler.java:72)\r\n\tat org.apache.hadoop.hive.metastore.HiveMetaStore.newRetryingHMSHandler(HiveMetaStore.java:5762)\r\n\tat org.apache.hadoop.hive.metastore.HiveMetaStoreClient.(HiveMetaStoreClient.java:199)\r\n\tat org.apache.hadoop.hive.ql.metadata.SessionHiveMetaStoreClient.(SessionHiveMetaStoreClient.java:74)\r\n\tat sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)\r\n\tat sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)\r\n\tat sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)\r\n\tat java.lang.reflect.Constructor.newInstance(Constructor.java:423)\r\n\tat org.apache.hadoop.hive.metastore.MetaStoreUtils.newInstance(MetaStoreUtils.java:1521)\r\n\tat org.apache.hadoop.hive.metastore.RetryingMetaStoreClient.(RetryingMetaStoreClient.java:86)\r\n\tat org.apache.hadoop.hive.metastore.RetryingMetaStoreClient.getProxy(RetryingMetaStoreClient.java:132)\r\n\tat org.apache.hadoop.hive.metastore.RetryingMetaStoreClient.getProxy(RetryingMetaStoreClient.java:104)\r\n\tat org.apache.hadoop.hive.ql.metadata.Hive.createMetaStoreClient(Hive.java:3005)\r\n\tat org.apache.hadoop.hive.ql.metadata.Hive.getMSC(Hive.java:3024)\r\n\tat org.apache.hadoop.hive.ql.session.SessionState.start(SessionState.java:503)\r\n\tat org.apache.spark.sql.hive.client.HiveClientImpl.newState(HiveClientImpl.scala:180)\r\n\tat org.apache.spark.sql.hive.client.HiveClientImpl.(HiveClientImpl.scala:114)\r\n\tat sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)\r\n\tat sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)\r\n\tat sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)\r\n\tat java.lang.reflect.Constructor.newInstance(Constructor.java:423)\r\n\tat org.apache.spark.sql.hive.client.IsolatedClientLoader.createClient(IsolatedClientLoader.scala:264)\r\n\tat org.apache.spark.sql.hive.HiveUtils$.newClientForMetadata(HiveUtils.scala:385)\r\n\tat org.apache.spark.sql.hive.HiveUtils$.newClientForMetadata(HiveUtils.scala:287)\r\n\tat org.apache.spark.sql.hive.HiveExternalCatalog.client$lzycompute(HiveExternalCatalog.scala:66)\r\n\tat org.apache.spark.sql.hive.HiveExternalCatalog.client(HiveExternalCatalog.scala:65)\r\n\tat org.apache.spark.sql.hive.HiveExternalCatalog$$anonfun$databaseExists$1.apply$mcZ$sp(HiveExternalCatalog.scala:195)\r\n\tat org.apache.spark.sql.hive.HiveExternalCatalog$$anonfun$databaseExists$1.apply(HiveExternalCatalog.scala:195)\r\n\tat org.apache.spark.sql.hive.HiveExternalCatalog$$anonfun$databaseExists$1.apply(HiveExternalCatalog.scala:195)\r\n\tat org.apache.spark.sql.hive.HiveExternalCatalog.withClient(HiveExternalCatalog.scala:97)\r\n\tat org.apache.spark.sql.hive.HiveExternalCatalog.databaseExists(HiveExternalCatalog.scala:194)\r\n\tat org.apache.spark.sql.internal.SharedState.externalCatalog$lzycompute(SharedState.scala:114)\r\n\tat org.apache.spark.sql.internal.SharedState.externalCatalog(SharedState.scala:102)\r\n\tat org.apache.spark.sql.hive.HiveSessionStateBuilder.externalCatalog(HiveSessionStateBuilder.scala:39)\r\n\tat org.apache.spark.sql.hive.HiveSessionStateBuilder.catalog$lzycompute(HiveSessionStateBuilder.scala:54)\r\n\tat org.apache.spark.sql.hive.HiveSessionStateBuilder.catalog(HiveSessionStateBuilder.scala:52)\r\n\tat org.apache.spark.sql.hive.HiveSessionStateBuilder$$anon$1.(HiveSessionStateBuilder.scala:69)\r\n\tat org.apache.spark.sql.hive.HiveSessionStateBuilder.analyzer(HiveSessionStateBuilder.scala:69)\r\n\tat org.apache.spark.sql.internal.BaseSessionStateBuilder$$anonfun$build$2.apply(BaseSessionStateBuilder.scala:293)\r\n\tat org.apache.spark.sql.internal.BaseSessionStateBuilder$$anonfun$build$2.apply(BaseSessionStateBuilder.scala:293)\r\n\tat org.apache.spark.sql.internal.SessionState.analyzer$lzycompute(SessionState.scala:79)\r\n\tat org.apache.spark.sql.internal.SessionState.analyzer(SessionState.scala:79)\r\n\tat org.apache.spark.sql.execution.QueryExecution.analyzed$lzycompute(QueryExecution.scala:57)\r\n\tat org.apache.spark.sql.execution.QueryExecution.analyzed(QueryExecution.scala:55)\r\n\tat org.apache.spark.sql.execution.QueryExecution.assertAnalyzed(QueryExecution.scala:47)\r\n\tat org.apache.spark.sql.Dataset$.ofRows(Dataset.scala:74)\r\n\tat org.apache.spark.sql.SparkSession.internalCreateDataFrame(SparkSession.scala:577)\r\n\tat org.apache.spark.sql.SparkSession.applySchemaToPythonRDD(SparkSession.scala:752)\r\n\tat org.apache.spark.sql.SparkSession.applySchemaToPythonRDD(SparkSession.scala:737)\r\n\tat sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\r\n\tat sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\r\n\tat sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\r\n\tat java.lang.reflect.Method.invoke(Method.java:498)\r\n\tat py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)\r\n\tat py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)\r\n\tat py4j.Gateway.invoke(Gateway.java:282)\r\n\tat py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)\r\n\tat py4j.commands.CallCommand.execute(CallCommand.java:79)\r\n\tat py4j.GatewayConnection.run(GatewayConnection.java:238)\r\n\tat java.lang.Thread.run(Thread.java:748)\r\nCaused by: ERROR XJ040: Failed to start database 'metastore_db' with class loader org.apache.spark.sql.hive.client.IsolatedClientLoader$$anon$1@4cbe88e2, see the next exception for details.\r\n\tat org.apache.derby.iapi.error.StandardException.newException(Unknown Source)\r\n\tat org.apache.derby.impl.jdbc.SQLExceptionFactory.wrapArgsForTransportAcrossDRDA(Unknown Source)\r\n\t... 113 more\r\nCaused by: ERROR XSDB6: Another instance of Derby may have already booted the database C:\\Users\\argenisleon\\Documents\\Optimus\\examples\\metastore_db.\r\n\tat org.apache.derby.iapi.error.StandardException.newException(Unknown Source)\r\n\tat org.apache.derby.iapi.error.StandardException.newException(Unknown Source)\r\n\tat org.apache.derby.impl.store.raw.data.BaseDataFileFactory.privGetJBMSLockOnDB(Unknown Source)\r\n\tat org.apache.derby.impl.store.raw.data.BaseDataFileFactory.run(Unknown Source)\r\n\tat java.security.AccessController.doPrivileged(Native Method)\r\n\tat org.apache.derby.impl.store.raw.data.BaseDataFileFactory.getJBMSLockOnDB(Unknown Source)\r\n\tat org.apache.derby.impl.store.raw.data.BaseDataFileFactory.boot(Unknown Source)\r\n\tat org.apache.derby.impl.services.monitor.BaseMonitor.boot(Unknown Source)\r\n\tat org.apache.derby.impl.services.monitor.TopService.bootModule(Unknown Source)\r\n\tat org.apache.derby.impl.services.monitor.BaseMonitor.startModule(Unknown Source)\r\n\tat org.apache.derby.impl.services.monitor.FileMonitor.startModule(Unknown Source)\r\n\tat org.apache.derby.iapi.services.monitor.Monitor.bootServiceModule(Unknown Source)\r\n\tat org.apache.derby.impl.store.raw.RawStore$6.run(Unknown Source)\r\n\tat java.security.AccessController.doPrivileged(Native Method)\r\n\tat org.apache.derby.impl.store.raw.RawStore.bootServiceModule(Unknown Source)\r\n\tat org.apache.derby.impl.store.raw.RawStore.boot(Unknown Source)\r\n\tat org.apache.derby.impl.services.monitor.BaseMonitor.boot(Unknown Source)\r\n\tat org.apache.derby.impl.services.monitor.TopService.bootModule(Unknown Source)\r\n\tat org.apache.derby.impl.services.monitor.BaseMonitor.startModule(Unknown Source)\r\n\tat org.apache.derby.impl.services.monitor.FileMonitor.startModule(Unknown Source)\r\n\tat org.apache.derby.iapi.services.monitor.Monitor.bootServiceModule(Unknown Source)\r\n\tat org.apache.derby.impl.store.access.RAMAccessManager$5.run(Unknown Source)\r\n\tat java.security.AccessController.doPrivileged(Native Method)\r\n\tat org.apache.derby.impl.store.access.RAMAccessManager.bootServiceModule(Unknown Source)\r\n\tat org.apache.derby.impl.store.access.RAMAccessManager.boot(Unknown Source)\r\n\tat org.apache.derby.impl.services.monitor.BaseMonitor.boot(Unknown Source)\r\n\tat org.apache.derby.impl.services.monitor.TopService.bootModule(Unknown Source)\r\n\tat org.apache.derby.impl.services.monitor.BaseMonitor.startModule(Unknown Source)\r\n\tat org.apache.derby.impl.services.monitor.FileMonitor.startModule(Unknown Source)\r\n\tat org.apache.derby.iapi.services.monitor.Monitor.bootServiceModule(Unknown Source)\r\n\tat org.apache.derby.impl.db.BasicDatabase$5.run(Unknown Source)\r\n\tat java.security.AccessController.doPrivileged(Native Method)\r\n\tat org.apache.derby.impl.db.BasicDatabase.bootServiceModule(Unknown Source)\r\n\tat org.apache.derby.impl.db.BasicDatabase.bootStore(Unknown Source)\r\n\tat org.apache.derby.impl.db.BasicDatabase.boot(Unknown Source)\r\n\tat org.apache.derby.impl.services.monitor.BaseMonitor.boot(Unknown Source)\r\n\tat org.apache.derby.impl.services.monitor.TopService.bootModule(Unknown Source)\r\n\tat org.apache.derby.impl.services.monitor.BaseMonitor.bootService(Unknown Source)\r\n\tat org.apache.derby.impl.services.monitor.BaseMonitor.startProviderService(Unknown Source)\r\n\tat org.apache.derby.impl.services.monitor.BaseMonitor.findProviderAndStartService(Unknown Source)\r\n\tat org.apache.derby.impl.services.monitor.BaseMonitor.startPersistentService(Unknown Source)\r\n\tat org.apache.derby.iapi.services.monitor.Monitor.startPersistentService(Unknown Source)\r\n\tat org.apache.derby.impl.jdbc.EmbedConnection$4.run(Unknown Source)\r\n\tat org.apache.derby.impl.jdbc.EmbedConnection$4.run(Unknown Source)\r\n\tat java.security.AccessController.doPrivileged(Native Method)\r\n\tat org.apache.derby.impl.jdbc.EmbedConnection.startPersistentService(Unknown Source)\r\n\t... 110 more\r\n------\r\n\r\n\tat org.datanucleus.api.jdo.NucleusJDOHelper.getJDOExceptionForNucleusException(NucleusJDOHelper.java:436)\r\n\tat org.datanucleus.api.jdo.JDOPersistenceManagerFactory.freezeConfiguration(JDOPersistenceManagerFactory.java:788)\r\n\tat org.datanucleus.api.jdo.JDOPersistenceManagerFactory.createPersistenceManagerFactory(JDOPersistenceManagerFactory.java:333)\r\n\tat org.datanucleus.api.jdo.JDOPersistenceManagerFactory.getPersistenceManagerFactory(JDOPersistenceManagerFactory.java:202)\r\n\tat sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\r\n\tat sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\r\n\tat sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\r\n\tat java.lang.reflect.Method.invoke(Method.java:498)\r\n\tat javax.jdo.JDOHelper$16.run(JDOHelper.java:1965)\r\n\tat java.security.AccessController.doPrivileged(Native Method)\r\n\tat javax.jdo.JDOHelper.invoke(JDOHelper.java:1960)\r\n\tat javax.jdo.JDOHelper.invokeGetPersistenceManagerFactoryOnImplementation(JDOHelper.java:1166)\r\n\tat javax.jdo.JDOHelper.getPersistenceManagerFactory(JDOHelper.java:808)\r\n\tat javax.jdo.JDOHelper.getPersistenceManagerFactory(JDOHelper.java:701)\r\n\tat org.apache.hadoop.hive.metastore.ObjectStore.getPMF(ObjectStore.java:365)\r\n\tat org.apache.hadoop.hive.metastore.ObjectStore.getPersistenceManager(ObjectStore.java:394)\r\n\tat org.apache.hadoop.hive.metastore.ObjectStore.initialize(ObjectStore.java:291)\r\n\tat org.apache.hadoop.hive.metastore.ObjectStore.setConf(ObjectStore.java:258)\r\n\tat org.apache.hadoop.util.ReflectionUtils.setConf(ReflectionUtils.java:76)\r\n\tat org.apache.hadoop.util.ReflectionUtils.newInstance(ReflectionUtils.java:136)\r\n\tat org.apache.hadoop.hive.metastore.RawStoreProxy.(RawStoreProxy.java:57)\r\n\tat org.apache.hadoop.hive.metastore.RawStoreProxy.getProxy(RawStoreProxy.java:66)\r\n\tat org.apache.hadoop.hive.metastore.HiveMetaStore$HMSHandler.newRawStore(HiveMetaStore.java:593)\r\n\tat org.apache.hadoop.hive.metastore.HiveMetaStore$HMSHandler.getMS(HiveMetaStore.java:571)\r\n\tat org.apache.hadoop.hive.metastore.HiveMetaStore$HMSHandler.createDefaultDB(HiveMetaStore.java:624)\r\n\tat org.apache.hadoop.hive.metastore.HiveMetaStore$HMSHandler.init(HiveMetaStore.java:461)\r\n\tat org.apache.hadoop.hive.metastore.RetryingHMSHandler.(RetryingHMSHandler.java:66)\r\n\tat org.apache.hadoop.hive.metastore.RetryingHMSHandler.getProxy(RetryingHMSHandler.java:72)\r\n\tat org.apache.hadoop.hive.metastore.HiveMetaStore.newRetryingHMSHandler(HiveMetaStore.java:5762)\r\n\tat org.apache.hadoop.hive.metastore.HiveMetaStoreClient.(HiveMetaStoreClient.java:199)\r\n\tat org.apache.hadoop.hive.ql.metadata.SessionHiveMetaStoreClient.(SessionHiveMetaStoreClient.java:74)\r\n\t... 56 more\r\nCaused by: java.sql.SQLException: Unable to open a test connection to the given database. JDBC url = jdbc:derby:;databaseName=metastore_db;create=true, username = APP. Terminating connection pool (set lazyInit to true if you expect to start your database after your app). Original Exception: ------\r\njava.sql.SQLException: Failed to start database 'metastore_db' with class loader org.apache.spark.sql.hive.client.IsolatedClientLoader$$anon$1@4cbe88e2, see the next exception for details.\r\n\tat org.apache.derby.impl.jdbc.SQLExceptionFactory.getSQLException(Unknown Source)\r\n\tat org.apache.derby.impl.jdbc.SQLExceptionFactory.getSQLException(Unknown Source)\r\n\tat org.apache.derby.impl.jdbc.Util.seeNextException(Unknown Source)\r\n\tat org.apache.derby.impl.jdbc.EmbedConnection.bootDatabase(Unknown Source)\r\n\tat org.apache.derby.impl.jdbc.EmbedConnection.(Unknown Source)\r\n\tat org.apache.derby.jdbc.InternalDriver$1.run(Unknown Source)\r\n\tat org.apache.derby.jdbc.InternalDriver$1.run(Unknown Source)\r\n\tat java.security.AccessController.doPrivileged(Native Method)\r\n\tat org.apache.derby.jdbc.InternalDriver.getNewEmbedConnection(Unknown Source)\r\n\tat org.apache.derby.jdbc.InternalDriver.connect(Unknown Source)\r\n\tat org.apache.derby.jdbc.InternalDriver.connect(Unknown Source)\r\n\tat org.apache.derby.jdbc.AutoloadedDriver.connect(Unknown Source)\r\n\tat java.sql.DriverManager.getConnection(DriverManager.java:664)\r\n\tat java.sql.DriverManager.getConnection(DriverManager.java:208)\r\n\tat com.jolbox.bonecp.BoneCP.obtainRawInternalConnection(BoneCP.java:361)\r\n\tat com.jolbox.bonecp.BoneCP.(BoneCP.java:416)\r\n\tat com.jolbox.bonecp.BoneCPDataSource.getConnection(BoneCPDataSource.java:120)\r\n\tat org.datanucleus.store.rdbms.ConnectionFactoryImpl$ManagedConnectionImpl.getConnection(ConnectionFactoryImpl.java:501)\r\n\tat org.datanucleus.store.rdbms.RDBMSStoreManager.(RDBMSStoreManager.java:298)\r\n\tat sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)\r\n\tat sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)\r\n\tat sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)\r\n\tat java.lang.reflect.Constructor.newInstance(Constructor.java:423)\r\n\tat org.datanucleus.plugin.NonManagedPluginRegistry.createExecutableExtension(NonManagedPluginRegistry.java:631)\r\n\tat org.datanucleus.plugin.PluginManager.createExecutableExtension(PluginManager.java:301)\r\n\tat org.datanucleus.NucleusContext.createStoreManagerForProperties(NucleusContext.java:1187)\r\n\tat org.datanucleus.NucleusContext.initialise(NucleusContext.java:356)\r\n\tat org.datanucleus.api.jdo.JDOPersistenceManagerFactory.freezeConfiguration(JDOPersistenceManagerFactory.java:775)\r\n\tat org.datanucleus.api.jdo.JDOPersistenceManagerFactory.createPersistenceManagerFactory(JDOPersistenceManagerFactory.java:333)\r\n\tat org.datanucleus.api.jdo.JDOPersistenceManagerFactory.getPersistenceManagerFactory(JDOPersistenceManagerFactory.java:202)\r\n\tat sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\r\n\tat sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\r\n\tat sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\r\n\tat java.lang.reflect.Method.invoke(Method.java:498)\r\n\tat javax.jdo.JDOHelper$16.run(JDOHelper.java:1965)\r\n\tat java.security.AccessController.doPrivileged(Native Method)\r\n\tat javax.jdo.JDOHelper.invoke(JDOHelper.java:1960)\r\n\tat javax.jdo.JDOHelper.invokeGetPersistenceManagerFactoryOnImplementation(JDOHelper.java:1166)\r\n\tat javax.jdo.JDOHelper.getPersistenceManagerFactory(JDOHelper.java:808)\r\n\tat javax.jdo.JDOHelper.getPersistenceManagerFactory(JDOHelper.java:701)\r\n\tat org.apache.hadoop.hive.metastore.ObjectStore.getPMF(ObjectStore.java:365)\r\n\tat org.apache.hadoop.hive.metastore.ObjectStore.getPersistenceManager(ObjectStore.java:394)\r\n\tat org.apache.hadoop.hive.metastore.ObjectStore.initialize(ObjectStore.java:291)\r\n\tat org.apache.hadoop.hive.metastore.ObjectStore.setConf(ObjectStore.java:258)\r\n\tat org.apache.hadoop.util.ReflectionUtils.setConf(ReflectionUtils.java:76)\r\n\tat org.apache.hadoop.util.ReflectionUtils.newInstance(ReflectionUtils.java:136)\r\n\tat org.apache.hadoop.hive.metastore.RawStoreProxy.(RawStoreProxy.java:57)\r\n\tat org.apache.hadoop.hive.metastore.RawStoreProxy.getProxy(RawStoreProxy.java:66)\r\n\tat org.apache.hadoop.hive.metastore.HiveMetaStore$HMSHandler.newRawStore(HiveMetaStore.java:593)\r\n\tat org.apache.hadoop.hive.metastore.HiveMetaStore$HMSHandler.getMS(HiveMetaStore.java:571)\r\n\tat org.apache.hadoop.hive.metastore.HiveMetaStore$HMSHandler.createDefaultDB(HiveMetaStore.java:624)\r\n\tat org.apache.hadoop.hive.metastore.HiveMetaStore$HMSHandler.init(HiveMetaStore.java:461)\r\n\tat org.apache.hadoop.hive.metastore.RetryingHMSHandler.(RetryingHMSHandler.java:66)\r\n\tat org.apache.hadoop.hive.metastore.RetryingHMSHandler.getProxy(RetryingHMSHandler.java:72)\r\n\tat org.apache.hadoop.hive.metastore.HiveMetaStore.newRetryingHMSHandler(HiveMetaStore.java:5762)\r\n\tat org.apache.hadoop.hive.metastore.HiveMetaStoreClient.(HiveMetaStoreClient.java:199)\r\n\tat org.apache.hadoop.hive.ql.metadata.SessionHiveMetaStoreClient.(SessionHiveMetaStoreClient.java:74)\r\n\tat sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)\r\n\tat sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)\r\n\tat sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)\r\n\tat java.lang.reflect.Constructor.newInstance(Constructor.java:423)\r\n\tat org.apache.hadoop.hive.metastore.MetaStoreUtils.newInstance(MetaStoreUtils.java:1521)\r\n\tat org.apache.hadoop.hive.metastore.RetryingMetaStoreClient.(RetryingMetaStoreClient.java:86)\r\n\tat org.apache.hadoop.hive.metastore.RetryingMetaStoreClient.getProxy(RetryingMetaStoreClient.java:132)\r\n\tat org.apache.hadoop.hive.metastore.RetryingMetaStoreClient.getProxy(RetryingMetaStoreClient.java:104)\r\n\tat org.apache.hadoop.hive.ql.metadata.Hive.createMetaStoreClient(Hive.java:3005)\r\n\tat org.apache.hadoop.hive.ql.metadata.Hive.getMSC(Hive.java:3024)\r\n\tat org.apache.hadoop.hive.ql.session.SessionState.start(SessionState.java:503)\r\n\tat org.apache.spark.sql.hive.client.HiveClientImpl.newState(HiveClientImpl.scala:180)\r\n\tat org.apache.spark.sql.hive.client.HiveClientImpl.(HiveClientImpl.scala:114)\r\n\tat sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)\r\n\tat sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)\r\n\tat sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)\r\n\tat java.lang.reflect.Constructor.newInstance(Constructor.java:423)\r\n\tat org.apache.spark.sql.hive.client.IsolatedClientLoader.createClient(IsolatedClientLoader.scala:264)\r\n\tat org.apache.spark.sql.hive.HiveUtils$.newClientForMetadata(HiveUtils.scala:385)\r\n\tat org.apache.spark.sql.hive.HiveUtils$.newClientForMetadata(HiveUtils.scala:287)\r\n\tat org.apache.spark.sql.hive.HiveExternalCatalog.client$lzycompute(HiveExternalCatalog.scala:66)\r\n\tat org.apache.spark.sql.hive.HiveExternalCatalog.client(HiveExternalCatalog.scala:65)\r\n\tat org.apache.spark.sql.hive.HiveExternalCatalog$$anonfun$databaseExists$1.apply$mcZ$sp(HiveExternalCatalog.scala:195)\r\n\tat org.apache.spark.sql.hive.HiveExternalCatalog$$anonfun$databaseExists$1.apply(HiveExternalCatalog.scala:195)\r\n\tat org.apache.spark.sql.hive.HiveExternalCatalog$$anonfun$databaseExists$1.apply(HiveExternalCatalog.scala:195)\r\n\tat org.apache.spark.sql.hive.HiveExternalCatalog.withClient(HiveExternalCatalog.scala:97)\r\n\tat org.apache.spark.sql.hive.HiveExternalCatalog.databaseExists(HiveExternalCatalog.scala:194)\r\n\tat org.apache.spark.sql.internal.SharedState.externalCatalog$lzycompute(SharedState.scala:114)\r\n\tat org.apache.spark.sql.internal.SharedState.externalCatalog(SharedState.scala:102)\r\n\tat org.apache.spark.sql.hive.HiveSessionStateBuilder.externalCatalog(HiveSessionStateBuilder.scala:39)\r\n\tat org.apache.spark.sql.hive.HiveSessionStateBuilder.catalog$lzycompute(HiveSessionStateBuilder.scala:54)\r\n\tat org.apache.spark.sql.hive.HiveSessionStateBuilder.catalog(HiveSessionStateBuilder.scala:52)\r\n\tat org.apache.spark.sql.hive.HiveSessionStateBuilder$$anon$1.(HiveSessionStateBuilder.scala:69)\r\n\tat org.apache.spark.sql.hive.HiveSessionStateBuilder.analyzer(HiveSessionStateBuilder.scala:69)\r\n\tat org.apache.spark.sql.internal.BaseSessionStateBuilder$$anonfun$build$2.apply(BaseSessionStateBuilder.scala:293)\r\n\tat org.apache.spark.sql.internal.BaseSessionStateBuilder$$anonfun$build$2.apply(BaseSessionStateBuilder.scala:293)\r\n\tat org.apache.spark.sql.internal.SessionState.analyzer$lzycompute(SessionState.scala:79)\r\n\tat org.apache.spark.sql.internal.SessionState.analyzer(SessionState.scala:79)\r\n\tat org.apache.spark.sql.execution.QueryExecution.analyzed$lzycompute(QueryExecution.scala:57)\r\n\tat org.apache.spark.sql.execution.QueryExecution.analyzed(QueryExecution.scala:55)\r\n\tat org.apache.spark.sql.execution.QueryExecution.assertAnalyzed(QueryExecution.scala:47)\r\n\tat org.apache.spark.sql.Dataset$.ofRows(Dataset.scala:74)\r\n\tat org.apache.spark.sql.SparkSession.internalCreateDataFrame(SparkSession.scala:577)\r\n\tat org.apache.spark.sql.SparkSession.applySchemaToPythonRDD(SparkSession.scala:752)\r\n\tat org.apache.spark.sql.SparkSession.applySchemaToPythonRDD(SparkSession.scala:737)\r\n\tat sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\r\n\tat sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\r\n\tat sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\r\n\tat java.lang.reflect.Method.invoke(Method.java:498)\r\n\tat py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)\r\n\tat py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)\r\n\tat py4j.Gateway.invoke(Gateway.java:282)\r\n\tat py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)\r\n\tat py4j.commands.CallCommand.execute(CallCommand.java:79)\r\n\tat py4j.GatewayConnection.run(GatewayConnection.java:238)\r\n\tat java.lang.Thread.run(Thread.java:748)\r\nCaused by: ERROR XJ040: Failed to start database 'metastore_db' with class loader org.apache.spark.sql.hive.client.IsolatedClientLoader$$anon$1@4cbe88e2, see the next exception for details.\r\n\tat org.apache.derby.iapi.error.StandardException.newException(Unknown Source)\r\n\tat org.apache.derby.impl.jdbc.SQLExceptionFactory.wrapArgsForTransportAcrossDRDA(Unknown Source)\r\n\t... 113 more\r\nCaused by: ERROR XSDB6: Another instance of Derby may have already booted the database C:\\Users\\argenisleon\\Documents\\Optimus\\examples\\metastore_db.\r\n\tat org.apache.derby.iapi.error.StandardException.newException(Unknown Source)\r\n\tat org.apache.derby.iapi.error.StandardException.newException(Unknown Source)\r\n\tat org.apache.derby.impl.store.raw.data.BaseDataFileFactory.privGetJBMSLockOnDB(Unknown Source)\r\n\tat org.apache.derby.impl.store.raw.data.BaseDataFileFactory.run(Unknown Source)\r\n\tat java.security.AccessController.doPrivileged(Native Method)\r\n\tat org.apache.derby.impl.store.raw.data.BaseDataFileFactory.getJBMSLockOnDB(Unknown Source)\r\n\tat org.apache.derby.impl.store.raw.data.BaseDataFileFactory.boot(Unknown Source)\r\n\tat org.apache.derby.impl.services.monitor.BaseMonitor.boot(Unknown Source)\r\n\tat org.apache.derby.impl.services.monitor.TopService.bootModule(Unknown Source)\r\n\tat org.apache.derby.impl.services.monitor.BaseMonitor.startModule(Unknown Source)\r\n\tat org.apache.derby.impl.services.monitor.FileMonitor.startModule(Unknown Source)\r\n\tat org.apache.derby.iapi.services.monitor.Monitor.bootServiceModule(Unknown Source)\r\n\tat org.apache.derby.impl.store.raw.RawStore$6.run(Unknown Source)\r\n\tat java.security.AccessController.doPrivileged(Native Method)\r\n\tat org.apache.derby.impl.store.raw.RawStore.bootServiceModule(Unknown Source)\r\n\tat org.apache.derby.impl.store.raw.RawStore.boot(Unknown Source)\r\n\tat org.apache.derby.impl.services.monitor.BaseMonitor.boot(Unknown Source)\r\n\tat org.apache.derby.impl.services.monitor.TopService.bootModule(Unknown Source)\r\n\tat org.apache.derby.impl.services.monitor.BaseMonitor.startModule(Unknown Source)\r\n\tat org.apache.derby.impl.services.monitor.FileMonitor.startModule(Unknown Source)\r\n\tat org.apache.derby.iapi.services.monitor.Monitor.bootServiceModule(Unknown Source)\r\n\tat org.apache.derby.impl.store.access.RAMAccessManager$5.run(Unknown Source)\r\n\tat java.security.AccessController.doPrivileged(Native Method)\r\n\tat org.apache.derby.impl.store.access.RAMAccessManager.bootServiceModule(Unknown Source)\r\n\tat org.apache.derby.impl.store.access.RAMAccessManager.boot(Unknown Source)\r\n\tat org.apache.derby.impl.services.monitor.BaseMonitor.boot(Unknown Source)\r\n\tat org.apache.derby.impl.services.monitor.TopService.bootModule(Unknown Source)\r\n\tat org.apache.derby.impl.services.monitor.BaseMonitor.startModule(Unknown Source)\r\n\tat org.apache.derby.impl.services.monitor.FileMonitor.startModule(Unknown Source)\r\n\tat org.apache.derby.iapi.services.monitor.Monitor.bootServiceModule(Unknown Source)\r\n\tat org.apache.derby.impl.db.BasicDatabase$5.run(Unknown Source)\r\n\tat java.security.AccessController.doPrivileged(Native Method)\r\n\tat org.apache.derby.impl.db.BasicDatabase.bootServiceModule(Unknown Source)\r\n\tat org.apache.derby.impl.db.BasicDatabase.bootStore(Unknown Source)\r\n\tat org.apache.derby.impl.db.BasicDatabase.boot(Unknown Source)\r\n\tat org.apache.derby.impl.services.monitor.BaseMonitor.boot(Unknown Source)\r\n\tat org.apache.derby.impl.services.monitor.TopService.bootModule(Unknown Source)\r\n\tat org.apache.derby.impl.services.monitor.BaseMonitor.bootService(Unknown Source)\r\n\tat org.apache.derby.impl.services.monitor.BaseMonitor.startProviderService(Unknown Source)\r\n\tat org.apache.derby.impl.services.monitor.BaseMonitor.findProviderAndStartService(Unknown Source)\r\n\tat org.apache.derby.impl.services.monitor.BaseMonitor.startPersistentService(Unknown Source)\r\n\tat org.apache.derby.iapi.services.monitor.Monitor.startPersistentService(Unknown Source)\r\n\tat org.apache.derby.impl.jdbc.EmbedConnection$4.run(Unknown Source)\r\n\tat org.apache.derby.impl.jdbc.EmbedConnection$4.run(Unknown Source)\r\n\tat java.security.AccessController.doPrivileged(Native Method)\r\n\tat org.apache.derby.impl.jdbc.EmbedConnection.startPersistentService(Unknown Source)\r\n\t... 110 more\r\n------\r\n\r\n\tat sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)\r\n\tat sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)\r\n\tat sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)\r\n\tat java.lang.reflect.Constructor.newInstance(Constructor.java:423)\r\n\tat com.jolbox.bonecp.PoolUtil.generateSQLException(PoolUtil.java:192)\r\n\tat com.jolbox.bonecp.BoneCP.(BoneCP.java:422)\r\n\tat com.jolbox.bonecp.BoneCPDataSource.getConnection(BoneCPDataSource.java:120)\r\n\tat org.datanucleus.store.rdbms.ConnectionFactoryImpl$ManagedConnectionImpl.getConnection(ConnectionFactoryImpl.java:501)\r\n\tat org.datanucleus.store.rdbms.RDBMSStoreManager.(RDBMSStoreManager.java:298)\r\n\tat sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)\r\n\tat sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)\r\n\tat sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)\r\n\tat java.lang.reflect.Constructor.newInstance(Constructor.java:423)\r\n\tat org.datanucleus.plugin.NonManagedPluginRegistry.createExecutableExtension(NonManagedPluginRegistry.java:631)\r\n\tat org.datanucleus.plugin.PluginManager.createExecutableExtension(PluginManager.java:301)\r\n\tat org.datanucleus.NucleusContext.createStoreManagerForProperties(NucleusContext.java:1187)\r\n\tat org.datanucleus.NucleusContext.initialise(NucleusContext.java:356)\r\n\tat org.datanucleus.api.jdo.JDOPersistenceManagerFactory.freezeConfiguration(JDOPersistenceManagerFactory.java:775)\r\n\t... 85 more\r\nCaused by: java.sql.SQLException: Failed to start database 'metastore_db' with class loader org.apache.spark.sql.hive.client.IsolatedClientLoader$$anon$1@4cbe88e2, see the next exception for details.\r\n\tat org.apache.derby.impl.jdbc.SQLExceptionFactory.getSQLException(Unknown Source)\r\n\tat org.apache.derby.impl.jdbc.SQLExceptionFactory.getSQLException(Unknown Source)\r\n\tat org.apache.derby.impl.jdbc.Util.seeNextException(Unknown Source)\r\n\tat org.apache.derby.impl.jdbc.EmbedConnection.bootDatabase(Unknown Source)\r\n\tat org.apache.derby.impl.jdbc.EmbedConnection.(Unknown Source)\r\n\tat org.apache.derby.jdbc.InternalDriver$1.run(Unknown Source)\r\n\tat org.apache.derby.jdbc.InternalDriver$1.run(Unknown Source)\r\n\tat java.security.AccessController.doPrivileged(Native Method)\r\n\tat org.apache.derby.jdbc.InternalDriver.getNewEmbedConnection(Unknown Source)\r\n\tat org.apache.derby.jdbc.InternalDriver.connect(Unknown Source)\r\n\tat org.apache.derby.jdbc.InternalDriver.connect(Unknown Source)\r\n\tat org.apache.derby.jdbc.AutoloadedDriver.connect(Unknown Source)\r\n\tat java.sql.DriverManager.getConnection(DriverManager.java:664)\r\n\tat java.sql.DriverManager.getConnection(DriverManager.java:208)\r\n\tat com.jolbox.bonecp.BoneCP.obtainRawInternalConnection(BoneCP.java:361)\r\n\tat com.jolbox.bonecp.BoneCP.(BoneCP.java:416)\r\n\t... 97 more\r\nCaused by: ERROR XJ040: Failed to start database 'metastore_db' with class loader org.apache.spark.sql.hive.client.IsolatedClientLoader$$anon$1@4cbe88e2, see the next exception for details.\r\n\tat org.apache.derby.iapi.error.StandardException.newException(Unknown Source)\r\n\tat org.apache.derby.impl.jdbc.SQLExceptionFactory.wrapArgsForTransportAcrossDRDA(Unknown Source)\r\n\t... 113 more\r\nCaused by: ERROR XSDB6: Another instance of Derby may have already booted the database C:\\Users\\argenisleon\\Documents\\Optimus\\examples\\metastore_db.\r\n\tat org.apache.derby.iapi.error.StandardException.newException(Unknown Source)\r\n\tat org.apache.derby.iapi.error.StandardException.newException(Unknown Source)\r\n\tat org.apache.derby.impl.store.raw.data.BaseDataFileFactory.privGetJBMSLockOnDB(Unknown Source)\r\n\tat org.apache.derby.impl.store.raw.data.BaseDataFileFactory.run(Unknown Source)\r\n\tat java.security.AccessController.doPrivileged(Native Method)\r\n\tat org.apache.derby.impl.store.raw.data.BaseDataFileFactory.getJBMSLockOnDB(Unknown Source)\r\n\tat org.apache.derby.impl.store.raw.data.BaseDataFileFactory.boot(Unknown Source)\r\n\tat org.apache.derby.impl.services.monitor.BaseMonitor.boot(Unknown Source)\r\n\tat org.apache.derby.impl.services.monitor.TopService.bootModule(Unknown Source)\r\n\tat org.apache.derby.impl.services.monitor.BaseMonitor.startModule(Unknown Source)\r\n\tat org.apache.derby.impl.services.monitor.FileMonitor.startModule(Unknown Source)\r\n\tat org.apache.derby.iapi.services.monitor.Monitor.bootServiceModule(Unknown Source)\r\n\tat org.apache.derby.impl.store.raw.RawStore$6.run(Unknown Source)\r\n\tat java.security.AccessController.doPrivileged(Native Method)\r\n\tat org.apache.derby.impl.store.raw.RawStore.bootServiceModule(Unknown Source)\r\n\tat org.apache.derby.impl.store.raw.RawStore.boot(Unknown Source)\r\n\tat org.apache.derby.impl.services.monitor.BaseMonitor.boot(Unknown Source)\r\n\tat org.apache.derby.impl.services.monitor.TopService.bootModule(Unknown Source)\r\n\tat org.apache.derby.impl.services.monitor.BaseMonitor.startModule(Unknown Source)\r\n\tat org.apache.derby.impl.services.monitor.FileMonitor.startModule(Unknown Source)\r\n\tat org.apache.derby.iapi.services.monitor.Monitor.bootServiceModule(Unknown Source)\r\n\tat org.apache.derby.impl.store.access.RAMAccessManager$5.run(Unknown Source)\r\n\tat java.security.AccessController.doPrivileged(Native Method)\r\n\tat org.apache.derby.impl.store.access.RAMAccessManager.bootServiceModule(Unknown Source)\r\n\tat org.apache.derby.impl.store.access.RAMAccessManager.boot(Unknown Source)\r\n\tat org.apache.derby.impl.services.monitor.BaseMonitor.boot(Unknown Source)\r\n\tat org.apache.derby.impl.services.monitor.TopService.bootModule(Unknown Source)\r\n\tat org.apache.derby.impl.services.monitor.BaseMonitor.startModule(Unknown Source)\r\n\tat org.apache.derby.impl.services.monitor.FileMonitor.startModule(Unknown Source)\r\n\tat org.apache.derby.iapi.services.monitor.Monitor.bootServiceModule(Unknown Source)\r\n\tat org.apache.derby.impl.db.BasicDatabase$5.run(Unknown Source)\r\n\tat java.security.AccessController.doPrivileged(Native Method)\r\n\tat org.apache.derby.impl.db.BasicDatabase.bootServiceModule(Unknown Source)\r\n\tat org.apache.derby.impl.db.BasicDatabase.bootStore(Unknown Source)\r\n\tat org.apache.derby.impl.db.BasicDatabase.boot(Unknown Source)\r\n\tat org.apache.derby.impl.services.monitor.BaseMonitor.boot(Unknown Source)\r\n\tat org.apache.derby.impl.services.monitor.TopService.bootModule(Unknown Source)\r\n\tat org.apache.derby.impl.services.monitor.BaseMonitor.bootService(Unknown Source)\r\n\tat org.apache.derby.impl.services.monitor.BaseMonitor.startProviderService(Unknown Source)\r\n\tat org.apache.derby.impl.services.monitor.BaseMonitor.findProviderAndStartService(Unknown Source)\r\n\tat org.apache.derby.impl.services.monitor.BaseMonitor.startPersistentService(Unknown Source)\r\n\tat org.apache.derby.iapi.services.monitor.Monitor.startPersistentService(Unknown Source)\r\n\tat org.apache.derby.impl.jdbc.EmbedConnection$4.run(Unknown Source)\r\n\tat org.apache.derby.impl.jdbc.EmbedConnection$4.run(Unknown Source)\r\n\tat java.security.AccessController.doPrivileged(Native Method)\r\n\tat org.apache.derby.impl.jdbc.EmbedConnection.startPersistentService(Unknown Source)\r\n\t... 110 more\r\n",
+ "\nDuring handling of the above exception, another exception occurred:\n",
+ "\u001b[1;31mAnalysisException\u001b[0m Traceback (most recent call last)",
+ "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m\u001b[0m\n\u001b[0;32m 43\u001b[0m [11.0], date(2011, 4, 10), datetime(2014, 6, 24), True, bytearray(\"15\", \"utf-8\"), None)\n\u001b[0;32m 44\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 45\u001b[1;33m ], infer_schema = True).h_repartition(1)\n\u001b[0m\u001b[0;32m 46\u001b[0m \u001b[0mdf\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mtable\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
+ "\u001b[1;32m~\\Documents\\Optimus\\optimus\\functions.py\u001b[0m in \u001b[0;36mdata_frame\u001b[1;34m(cols, rows, infer_schema, pdf)\u001b[0m\n\u001b[0;32m 522\u001b[0m \u001b[0mstruct_fields\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mlist\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mmap\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;32mlambda\u001b[0m \u001b[0mx\u001b[0m\u001b[1;33m:\u001b[0m \u001b[0mStructField\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m*\u001b[0m\u001b[0mx\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mspecs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 523\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 524\u001b[1;33m \u001b[0mresult\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mSpark\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0minstance\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mspark\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcreateDataFrame\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mrows\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mStructType\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mstruct_fields\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 525\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 526\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0mresult\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
+ "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\pyspark\\sql\\session.py\u001b[0m in \u001b[0;36mcreateDataFrame\u001b[1;34m(self, data, schema, samplingRatio, verifySchema)\u001b[0m\n\u001b[0;32m 691\u001b[0m \u001b[0mrdd\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mschema\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_createFromLocal\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mmap\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mprepare\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdata\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mschema\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 692\u001b[0m \u001b[0mjrdd\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_jvm\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mSerDeUtil\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mtoJavaArray\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mrdd\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_to_java_object_rdd\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 693\u001b[1;33m \u001b[0mjdf\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_jsparkSession\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mapplySchemaToPythonRDD\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mjrdd\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mrdd\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mschema\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mjson\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 694\u001b[0m \u001b[0mdf\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mDataFrame\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mjdf\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_wrapped\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 695\u001b[0m \u001b[0mdf\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_schema\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mschema\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
+ "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\py4j\\java_gateway.py\u001b[0m in \u001b[0;36m__call__\u001b[1;34m(self, *args)\u001b[0m\n\u001b[0;32m 1255\u001b[0m \u001b[0manswer\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mgateway_client\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0msend_command\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mcommand\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1256\u001b[0m return_value = get_return_value(\n\u001b[1;32m-> 1257\u001b[1;33m answer, self.gateway_client, self.target_id, self.name)\n\u001b[0m\u001b[0;32m 1258\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1259\u001b[0m \u001b[1;32mfor\u001b[0m \u001b[0mtemp_arg\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mtemp_args\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
+ "\u001b[1;32m~\\Anaconda3\\lib\\site-packages\\pyspark\\sql\\utils.py\u001b[0m in \u001b[0;36mdeco\u001b[1;34m(*a, **kw)\u001b[0m\n\u001b[0;32m 67\u001b[0m e.java_exception.getStackTrace()))\n\u001b[0;32m 68\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0ms\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mstartswith\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m'org.apache.spark.sql.AnalysisException: '\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 69\u001b[1;33m \u001b[1;32mraise\u001b[0m \u001b[0mAnalysisException\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0ms\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0msplit\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m': '\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;36m1\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mstackTrace\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 70\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0ms\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mstartswith\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m'org.apache.spark.sql.catalyst.analysis'\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 71\u001b[0m \u001b[1;32mraise\u001b[0m \u001b[0mAnalysisException\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0ms\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0msplit\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m': '\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;36m1\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mstackTrace\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
+ "\u001b[1;31mAnalysisException\u001b[0m: 'java.lang.RuntimeException: java.lang.RuntimeException: Unable to instantiate org.apache.hadoop.hive.ql.metadata.SessionHiveMetaStoreClient;'"
+ ]
+ }
+ ],
+ "source": [
+ "import pandas as pd\n",
+ "from pyspark.sql.types import *\n",
+ "from datetime import date, datetime\n",
+ "\n",
+ "df = op.create.df(\n",
+ " [\n",
+ " (\"names\", \"str\", True), \n",
+ " (\"height(ft)\",\"int\", True), \n",
+ " (\"function\", \"str\", True), \n",
+ " (\"rank\", \"int\", True), \n",
+ " (\"age\",\"int\",True),\n",
+ " (\"weight(t)\",\"float\",True),\n",
+ " (\"japanese name\", ArrayType(StringType()), True),\n",
+ " (\"last position seen\", \"str\", True),\n",
+ " (\"date arrival\", \"str\", True),\n",
+ " (\"last date seen\", \"str\", True),\n",
+ " (\"attributes\", ArrayType(FloatType()), True),\n",
+ " (\"DateType\"),\n",
+ " (\"Tiemstamp\"),\n",
+ " (\"Cybertronian\", \"bool\", True), \n",
+ " (\"function(binary)\",\"binary\", False),\n",
+ " (\"NullType\", \"null\", True),\n",
+ "\n",
+ " ],\n",
+ " [\n",
+ " (\"Optim'us\", 28, \"Leader\", 10, 5000000, 4.30, [\"Inochi\", \"Convoy\"], \"19.442735,-99.201111\", \"1980/04/10\",\n",
+ " \"2016/09/10\", [8.5344, 4300.0], date(2016, 9, 10), datetime(2014, 6, 24), True, bytearray(\"Leader\", \"utf-8\"),\n",
+ " None),\n",
+ " (\"bumbl#ebéé \", 17, \"Espionage\", 7, 5000000, 2.0, [\"Bumble\", \"Goldback\"], \"10.642707,-71.612534\", \"1980/04/10\",\n",
+ " \"2015/08/10\", [5.334, 2000.0], date(2015, 8, 10), datetime(2014, 6, 24), True, bytearray(\"Espionage\", \"utf-8\"),\n",
+ " None),\n",
+ " (\"ironhide&\", 26, \"Security\", 7, 5000000, 4.0, [\"Roadbuster\"], \"37.789563,-122.400356\", \"1980/04/10\",\n",
+ " \"2014/07/10\", [7.9248, 4000.0], date(2014, 6, 24), datetime(2014, 6, 24), True, bytearray(\"Security\", \"utf-8\"),\n",
+ " None),\n",
+ " (\"Jazz\", 13, \"First Lieutenant\", 8, 5000000, 1.80, [\"Meister\"], \"33.670666,-117.841553\", \"1980/04/10\",\n",
+ " \"2013/06/10\", [3.9624, 1800.0], date(2013, 6, 24), datetime(2014, 6, 24), True,\n",
+ " bytearray(\"First Lieutenant\", \"utf-8\"), None),\n",
+ " (\"Megatron\", None, \"None\", 10, 5000000, 5.70, [\"Megatron\"], None, \"1980/04/10\", \"2012/05/10\", [None, 5700.0],\n",
+ " date(2012, 5, 10), datetime(2014, 6, 24), True, bytearray(\"None\", \"utf-8\"), None),\n",
+ " (\"Metroplex_)^$\", 300, \"Battle Station\", 8, 5000000, None, [\"Metroflex\"], None, \"1980/04/10\", \"2011/04/10\",\n",
+ " [91.44, None], date(2011, 4, 10), datetime(2014, 6, 24), True, bytearray(\"Battle Station\", \"utf-8\"), None),\n",
+ " (\"1\", 2, \"3\", 4, 5, 6.0, [\"7\"], 8, \"1980/04/10\", \"2011/04/10\",\n",
+ " [11.0], date(2011, 4, 10), datetime(2014, 6, 24), True, bytearray(\"15\", \"utf-8\"), None)\n",
+ "\n",
+ " ], infer_schema = True).h_repartition(1)\n",
+ "df.table()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 29,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ "Viewing 6 of 6 rows / 16 columns
\n",
+ "1 partition(s)
\n",
+ "\n",
+ "\n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " names \n",
+ " 1 (string) \n",
+ " \n",
+ " \n",
+ " nullable\n",
+ " \n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " height(ft) \n",
+ " 2 (int) \n",
+ " \n",
+ " \n",
+ " nullable\n",
+ " \n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " function \n",
+ " 3 (string) \n",
+ " \n",
+ " \n",
+ " nullable\n",
+ " \n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " rank \n",
+ " 4 (int) \n",
+ " \n",
+ " \n",
+ " nullable\n",
+ " \n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " age \n",
+ " 5 (int) \n",
+ " \n",
+ " \n",
+ " nullable\n",
+ " \n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " weight(t) \n",
+ " 6 (float) \n",
+ " \n",
+ " \n",
+ " nullable\n",
+ " \n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " japanese name \n",
+ " 7 (array<string>) \n",
+ " \n",
+ " \n",
+ " nullable\n",
+ " \n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " last position seen \n",
+ " 8 (string) \n",
+ " \n",
+ " \n",
+ " nullable\n",
+ " \n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " date arrival \n",
+ " 9 (string) \n",
+ " \n",
+ " \n",
+ " nullable\n",
+ " \n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " last date seen \n",
+ " 10 (string) \n",
+ " \n",
+ " \n",
+ " nullable\n",
+ " \n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " attributes \n",
+ " 11 (array<float>) \n",
+ " \n",
+ " \n",
+ " nullable\n",
+ " \n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " DateType \n",
+ " 12 (date) \n",
+ " \n",
+ " \n",
+ " nullable\n",
+ " \n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " Tiemstamp \n",
+ " 13 (timestamp) \n",
+ " \n",
+ " \n",
+ " nullable\n",
+ " \n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " Cybertronian \n",
+ " 14 (boolean) \n",
+ " \n",
+ " \n",
+ " nullable\n",
+ " \n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " function(binary) \n",
+ " 15 (binary) \n",
+ " \n",
+ " \n",
+ " not nullable\n",
+ " \n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " NullType \n",
+ " 16 (null) \n",
+ " \n",
+ " \n",
+ " nullable\n",
+ " \n",
+ " \n",
+ " | \n",
+ " \n",
+ "
\n",
+ "\n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " Optim'us\n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " 28\n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " Leader\n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " 10\n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " 5000000\n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " 4.300000190734863\n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " ['Inochi',⋅'Convoy']\n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " 19.442735,-99.201111\n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " 1980/04/10\n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " 2016/09/10\n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " [8.53439998626709,⋅4300.0]\n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " 2016-09-10\n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " 2014-06-24⋅00:00:00\n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " True\n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " None\n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " None\n",
+ " \n",
+ " | \n",
+ " \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " bumbl#ebéé⋅⋅\n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " 17\n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " Espionage\n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " 7\n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " 5000000\n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " 2.0\n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " ['Bumble',⋅'Goldback']\n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " 10.642707,-71.612534\n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " 1980/04/10\n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " 2015/08/10\n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " [5.334000110626221,⋅2000.0]\n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " 2015-08-10\n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " 2014-06-24⋅00:00:00\n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " True\n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " None\n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " None\n",
+ " \n",
+ " | \n",
+ " \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " ironhide&\n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " 26\n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " Security\n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " 7\n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " 5000000\n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " 4.0\n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " ['Roadbuster']\n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " 37.789563,-122.400356\n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " 1980/04/10\n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " 2014/07/10\n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " [7.924799919128418,⋅4000.0]\n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " 2014-06-24\n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " 2014-06-24⋅00:00:00\n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " True\n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " None\n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " None\n",
+ " \n",
+ " | \n",
+ " \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " Jazz\n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " 13\n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " First⋅Lieutenant\n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " 8\n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " 5000000\n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " 1.7999999523162842\n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " ['Meister']\n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " 33.670666,-117.841553\n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " 1980/04/10\n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " 2013/06/10\n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " [3.962399959564209,⋅1800.0]\n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " 2013-06-24\n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " 2014-06-24⋅00:00:00\n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " True\n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " None\n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " None\n",
+ " \n",
+ " | \n",
+ " \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " Megatron\n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " None\n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " None\n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " 10\n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " 5000000\n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " 5.699999809265137\n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " ['Megatron']\n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " None\n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " 1980/04/10\n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " 2012/05/10\n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " [None,⋅5700.0]\n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " 2012-05-10\n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " 2014-06-24⋅00:00:00\n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " True\n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " None\n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " None\n",
+ " \n",
+ " | \n",
+ " \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " Metroplex_)^$\n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " 300\n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " Battle⋅Station\n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " 8\n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " 5000000\n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " None\n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " ['Metroflex']\n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " None\n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " 1980/04/10\n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " 2011/04/10\n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " [91.44000244140625,⋅None]\n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " 2011-04-10\n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " 2014-06-24⋅00:00:00\n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " True\n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " None\n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " None\n",
+ " \n",
+ " | \n",
+ " \n",
+ "
\n",
+ " \n",
+ " \n",
+ "
\n",
+ "\n",
"\n",
- " .info_items{\n",
- " font-family:sans-serif;\n",
- " font-size:10px;\n",
- " }\n",
- ""
+ "Viewing 6 of 6 rows / 16 columns
\n",
+ "1 partition(s)
\n"
],
"text/plain": [
""
@@ -155,12 +1046,12 @@
}
],
"source": [
- "op= Optimus(verbose=True)"
+ "df.rows.select_by_dtypes(\"names\",\"str\").table()"
]
},
{
"cell_type": "code",
- "execution_count": 5,
+ "execution_count": 6,
"metadata": {},
"outputs": [
{
@@ -430,8 +1321,8 @@
" \n",
" \n",
" \n",
- " bytearray(b'Leader')\n",
+ " None\n",
" \n",
" | \n",
" \n",
@@ -530,8 +1421,8 @@
" \n",
" \n",
" \n",
- " bytearray(b'Espionage')\n",
+ " None\n",
" \n",
" | \n",
" \n",
@@ -630,8 +1521,8 @@
" \n",
" \n",
" \n",
- " bytearray(b'Security')\n",
+ " None\n",
" \n",
" | \n",
" \n",
@@ -730,8 +1621,8 @@
" \n",
" \n",
" \n",
- " bytearray(b'First⋅Lieutenant')\n",
+ " None\n",
" \n",
" | \n",
" \n",
@@ -830,8 +1721,8 @@
" \n",
" \n",
" \n",
- " bytearray(b'None')\n",
+ " None\n",
" \n",
" | \n",
" \n",
@@ -930,8 +1821,8 @@
" \n",
" \n",
" \n",
- " bytearray(b'Battle⋅Station')\n",
+ " None\n",
" \n",
" | \n",
" \n",
@@ -959,55 +1850,47 @@
}
],
"source": [
- "import pandas as pd\n",
- "from pyspark.sql.types import *\n",
- "from datetime import date, datetime\n",
- "\n",
- "df = op.create.df(\n",
- " [\n",
- " (\"names\", \"str\", True), \n",
- " (\"height(ft)\",\"int\", True), \n",
- " (\"function\", \"str\", True), \n",
- " (\"rank\", \"int\", True), \n",
- " (\"age\",\"int\",True),\n",
- " (\"weight(t)\",\"float\",True),\n",
- " (\"japanese name\", ArrayType(StringType()), True),\n",
- " (\"last position seen\", \"str\", True),\n",
- " (\"date arrival\", \"str\", True),\n",
- " (\"last date seen\", \"str\", True),\n",
- " (\"attributes\", ArrayType(FloatType()), True),\n",
- " (\"DateType\"),\n",
- " (\"Tiemstamp\"),\n",
- " (\"Cybertronian\", \"bool\", True), \n",
- " (\"function(binary)\",\"binary\", False),\n",
- " (\"NullType\", \"null\", True),\n",
- "\n",
- " ],\n",
- " [\n",
- " (\"Optim'us\", 28, \"Leader\", 10, 5000000, 4.30, [\"Inochi\", \"Convoy\"], \"19.442735,-99.201111\", \"1980/04/10\",\n",
- " \"2016/09/10\", [8.5344, 4300.0], date(2016, 9, 10), datetime(2014, 6, 24), True, bytearray(\"Leader\", \"utf-8\"),\n",
- " None),\n",
- " (\"bumbl#ebéé \", 17, \"Espionage\", 7, 5000000, 2.0, [\"Bumble\", \"Goldback\"], \"10.642707,-71.612534\", \"1980/04/10\",\n",
- " \"2015/08/10\", [5.334, 2000.0], date(2015, 8, 10), datetime(2014, 6, 24), True, bytearray(\"Espionage\", \"utf-8\"),\n",
- " None),\n",
- " (\"ironhide&\", 26, \"Security\", 7, 5000000, 4.0, [\"Roadbuster\"], \"37.789563,-122.400356\", \"1980/04/10\",\n",
- " \"2014/07/10\", [7.9248, 4000.0], date(2014, 6, 24), datetime(2014, 6, 24), True, bytearray(\"Security\", \"utf-8\"),\n",
- " None),\n",
- " (\"Jazz\", 13, \"First Lieutenant\", 8, 5000000, 1.80, [\"Meister\"], \"33.670666,-117.841553\", \"1980/04/10\",\n",
- " \"2013/06/10\", [3.9624, 1800.0], date(2013, 6, 24), datetime(2014, 6, 24), True,\n",
- " bytearray(\"First Lieutenant\", \"utf-8\"), None),\n",
- " (\"Megatron\", None, \"None\", 10, 5000000, 5.70, [\"Megatron\"], None, \"1980/04/10\", \"2012/05/10\", [None, 5700.0],\n",
- " date(2012, 5, 10), datetime(2014, 6, 24), True, bytearray(\"None\", \"utf-8\"), None),\n",
- " (\"Metroplex_)^$\", 300, \"Battle Station\", 8, 5000000, None, [\"Metroflex\"], None, \"1980/04/10\", \"2011/04/10\",\n",
- " [91.44, None], date(2011, 4, 10), datetime(2014, 6, 24), True, bytearray(\"Battle Station\", \"utf-8\"), None),\n",
- "\n",
- " ], infer_schema = True).h_repartition(1)\n",
- "df.table()"
+ "df.rows.drop_by_dtypes(\"names\",\"int\").table()"
]
},
{
"cell_type": "code",
- "execution_count": 6,
+ "execution_count": 32,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "{'names': 'string',\n",
+ " 'height(ft)': 'int',\n",
+ " 'function': 'string',\n",
+ " 'rank': 'int',\n",
+ " 'age': 'int',\n",
+ " 'weight(t)': 'float',\n",
+ " 'japanese name': 'array',\n",
+ " 'last position seen': 'string',\n",
+ " 'date arrival': 'string',\n",
+ " 'last date seen': 'string',\n",
+ " 'attributes': 'array',\n",
+ " 'DateType': 'date',\n",
+ " 'Tiemstamp': 'timestamp',\n",
+ " 'Cybertronian': 'boolean',\n",
+ " 'function(binary)': 'binary',\n",
+ " 'NullType': 'null'}"
+ ]
+ },
+ "execution_count": 32,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df.cols.dtypes()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 21,
"metadata": {},
"outputs": [
{
@@ -1277,8 +2160,8 @@
" \n",
" \n",
" \n",
- " bytearray(b'Leader')\n",
+ " None\n",
" \n",
" | \n",
" \n",
@@ -1377,8 +2260,8 @@
" \n",
" \n",
" \n",
- " bytearray(b'Espionage')\n",
+ " None\n",
" \n",
" | \n",
" \n",
@@ -1477,8 +2360,8 @@
" \n",
" \n",
" \n",
- " bytearray(b'Security')\n",
+ " None\n",
" \n",
" | \n",
" \n",
@@ -1577,8 +2460,8 @@
" \n",
" \n",
" \n",
- " bytearray(b'First⋅Lieutenant')\n",
+ " None\n",
" \n",
" | \n",
" \n",
@@ -1677,8 +2560,8 @@
" \n",
" \n",
" \n",
- " bytearray(b'None')\n",
+ " None\n",
" \n",
" | \n",
" \n",
@@ -1777,8 +2660,8 @@
" \n",
" \n",
" \n",
- " bytearray(b'Battle⋅Station')\n",
+ " None\n",
" \n",
" | \n",
" \n",
@@ -12742,8 +13625,37 @@
}
],
"source": [
- "!pip install altair\n",
- "!pip install -U altair vega_datasets notebook vega"
+ "import pandas as pd\n",
+ "from pyspark.sql.types import *\n",
+ "from datetime import date, datetime\n",
+ "\n",
+ "df = op.create.df(\n",
+ " [\n",
+ " (\"names\", \"str\", True), \n",
+ " (\"height(ft)\",\"int\", True), \n",
+ " (\"function\", \"str\", True), \n",
+ " (\"rank\", \"int\", True), \n",
+ " ],\n",
+ " [\n",
+ " (\"Optim'us\", 28, \"Leader\", 10, 5000000, 4.30, [\"Inochi\", \"Convoy\"], \"19.442735,-99.201111\", \"1980/04/10\",\n",
+ " \"2016/09/10\", [8.5344, 4300.0], date(2016, 9, 10), datetime(2014, 6, 24), True, bytearray(\"Leader\", \"utf-8\"),\n",
+ " None),\n",
+ " (\"bumbl#ebéé \", 17, \"Espionage\", 7, 5000000, 2.0, [\"Bumble\", \"Goldback\"], \"10.642707,-71.612534\", \"1980/04/10\",\n",
+ " \"2015/08/10\", [5.334, 2000.0], date(2015, 8, 10), datetime(2014, 6, 24), True, bytearray(\"Espionage\", \"utf-8\"),\n",
+ " None),\n",
+ " (\"ironhide&\", 26, \"Security\", 7, 5000000, 4.0, [\"Roadbuster\"], \"37.789563,-122.400356\", \"1980/04/10\",\n",
+ " \"2014/07/10\", [7.9248, 4000.0], date(2014, 6, 24), datetime(2014, 6, 24), True, bytearray(\"Security\", \"utf-8\"),\n",
+ " None),\n",
+ " (\"Jazz\", 13, \"First Lieutenant\", 8, 5000000, 1.80, [\"Meister\"], \"33.670666,-117.841553\", \"1980/04/10\",\n",
+ " \"2013/06/10\", [3.9624, 1800.0], date(2013, 6, 24), datetime(2014, 6, 24), True,\n",
+ " bytearray(\"First Lieutenant\", \"utf-8\"), None),\n",
+ " (\"Megatron\", None, \"None\", 10, 5000000, 5.70, [\"Megatron\"], None, \"1980/04/10\", \"2012/05/10\", [None, 5700.0],\n",
+ " date(2012, 5, 10), datetime(2014, 6, 24), True, bytearray(\"None\", \"utf-8\"), None),\n",
+ " (\"Metroplex_)^$\", 300, \"Battle Station\", 8, 5000000, None, [\"Metroflex\"], None, \"1980/04/10\", \"2011/04/10\",\n",
+ " [91.44, None], date(2011, 4, 10), datetime(2014, 6, 24), True, bytearray(\"Battle Station\", \"utf-8\"), None),\n",
+ "\n",
+ " ], infer_schema = True).h_repartition(1)\n",
+ "df.table()"
]
},
{
diff --git a/images/box.png b/images/box.png
index b0b919d09..9794681ae 100644
Binary files a/images/box.png and b/images/box.png differ
diff --git a/images/frequency.png b/images/frequency.png
index 5ca6595d7..704f7c31f 100644
Binary files a/images/frequency.png and b/images/frequency.png differ
diff --git a/images/hist.png b/images/hist.png
index 306604922..a749f9d06 100644
Binary files a/images/hist.png and b/images/hist.png differ
diff --git a/images/profiler.png b/images/profiler.png
index ed5088606..b4c283d9c 100644
Binary files a/images/profiler.png and b/images/profiler.png differ
diff --git a/images/profiler1.png b/images/profiler1.png
index d199f3807..9b25cc170 100644
Binary files a/images/profiler1.png and b/images/profiler1.png differ
diff --git a/images/profiler_numeric.png b/images/profiler_numeric.png
new file mode 100644
index 000000000..a69fb054f
Binary files /dev/null and b/images/profiler_numeric.png differ
diff --git a/images/scatter.png b/images/scatter.png
index 755ce48b6..d85cf9137 100644
Binary files a/images/scatter.png and b/images/scatter.png differ
diff --git a/images/table.png b/images/table.png
index 872fed015..b501d550b 100644
Binary files a/images/table.png and b/images/table.png differ
diff --git a/images/table1.png b/images/table1.png
index 872fed015..b501d550b 100644
Binary files a/images/table1.png and b/images/table1.png differ
diff --git a/images/table2.png b/images/table2.png
index 229b3b5a5..8e3259b52 100644
Binary files a/images/table2.png and b/images/table2.png differ
diff --git a/images/table3.png b/images/table3.png
index afd8d89ad..d3d955d22 100644
Binary files a/images/table3.png and b/images/table3.png differ
diff --git a/images/table4.png b/images/table4.png
index 10fd01dbd..bd5fb9440 100644
Binary files a/images/table4.png and b/images/table4.png differ
diff --git a/images/table5.png b/images/table5.png
index 1b6db93fc..3f13db92c 100644
Binary files a/images/table5.png and b/images/table5.png differ
diff --git a/images/table6.png b/images/table6.png
index 55f392bb4..aed92ed8b 100644
Binary files a/images/table6.png and b/images/table6.png differ
diff --git a/optimus/dataframe/columns.py b/optimus/dataframe/columns.py
index 2c3543f3d..746c065bc 100644
--- a/optimus/dataframe/columns.py
+++ b/optimus/dataframe/columns.py
@@ -37,6 +37,8 @@
from optimus.profiler.functions import bucketizer
from optimus.profiler.functions import create_buckets
+from optimus.functions import append as _append
+
def cols(self):
@add_attr(cols)
@@ -83,7 +85,7 @@ def append(cols_values=None):
if is_list_of_dataframes(cols_values):
dfs = cols_values
dfs.insert(0, self)
- df_result = append(dfs, like="columns")
+ df_result = _append(dfs, like="columns")
elif is_list_of_tuples(cols_values):
df_result = self
@@ -1670,12 +1672,15 @@ def dtypes(columns="*"):
return format_dict({col_name: data_types[col_name] for col_name in columns})
@add_attr(cols)
- def names(filter_by_column_dtypes=None):
+ def names(col_names="*", filter_by_column_dtypes=None, invert=False):
"""
- Get column names
+ Get columns names
+ :param col_names: Columns names to be processed '*' for all or a list of column names
+ :param filter_by_column_dtypes: Data type used to select the columns
+ :param invert: Invert the columns selection
:return:
"""
- columns = parse_columns(self, "*", filter_by_column_dtypes=filter_by_column_dtypes)
+ columns = parse_columns(self, col_names, filter_by_column_dtypes=filter_by_column_dtypes, invert=invert)
return columns
@add_attr(cols)
diff --git a/optimus/dataframe/extension.py b/optimus/dataframe/extension.py
index 0518a1738..43561e32c 100644
--- a/optimus/dataframe/extension.py
+++ b/optimus/dataframe/extension.py
@@ -1,3 +1,4 @@
+import json
import os
import humanize
@@ -12,11 +13,11 @@
from pyspark.sql import functions as F
from pyspark.sql.types import *
-from optimus import RaiseIt
+from optimus.helpers.raiseit import RaiseIt
from optimus.helpers.checkit import is_str, is_column_a
from optimus.helpers.convert import val_to_list, one_list_to_val
from optimus.helpers.decorators import *
-from optimus.helpers.functions import parse_columns, collect_as_dict, random_int, traverse, print_html
+from optimus.helpers.functions import parse_columns, collect_as_dict, random_int, traverse, print_html, json_converter
from optimus.helpers.logger import logger
from optimus.profiler.templates.html import HEADER, FOOTER
from optimus.spark import Spark
@@ -38,7 +39,7 @@ def to_json(self):
:param self:
:return:
"""
- return collect_as_dict(self.collect())
+ return json.loads(json.dumps(collect_as_dict(self), ensure_ascii=False, default=json_converter))
@add_method(DataFrame)
diff --git a/optimus/helpers/functions.py b/optimus/helpers/functions.py
index 4a3f8284e..3528d91b1 100644
--- a/optimus/helpers/functions.py
+++ b/optimus/helpers/functions.py
@@ -1,3 +1,4 @@
+import collections
import datetime
import inspect
import json
@@ -30,7 +31,6 @@ def infer(value):
:return: Spark data type
"""
result = None
- # print(v)
if value is None:
result = "null"
@@ -120,10 +120,10 @@ def collect_as_list(df):
return df.rdd.flatMap(lambda x: x).collect()
-def collect_as_dict(value):
+def collect_as_dict(df):
"""
Return a dict from a Collect result
- :param value:
+ :param df:
:return:
"""
@@ -133,8 +133,15 @@ def collect_as_dict(value):
if len(dict_result) == 1:
dict_result = next(iter(dict_result.values()))
else:
- dict_result = [v.asDict() for v in value]
-
+ col_names = parse_columns(df, "*")
+
+ # Because asDict can return messed columns names we order
+ for row in df.collect():
+ _row = row.asDict()
+ r = collections.OrderedDict()
+ for col in col_names:
+ r[col] = _row[col]
+ dict_result.append(r)
return dict_result
@@ -282,7 +289,6 @@ def escape_columns(columns):
escaped_columns = "`" + columns + "`"
else:
escaped_columns.append(columns)
- # print(escaped_columns)
return escaped_columns
@@ -306,7 +312,7 @@ def get_output_cols(input_cols, output_cols):
def parse_columns(df, cols_args, get_args=False, is_regex=None, filter_by_column_dtypes=None,
- accepts_missing_cols=False):
+ accepts_missing_cols=False, invert=False):
"""
Return a list of columns and check that columns exists in the dataframe
Accept '*' as parameter in which case return a list of all columns in the dataframe.
@@ -319,14 +325,13 @@ def parse_columns(df, cols_args, get_args=False, is_regex=None, filter_by_column
:param is_regex: Use True is col_attrs is a regex
:param filter_by_column_dtypes: A data type for which a columns list is going be filtered
:param accepts_missing_cols: if true not check if column exist in the dataframe
+ :param invert: Invert the final selection. For example if you want to select not integers
+
:return: A list of columns string names
"""
attrs = None
- # ensure that cols_args is a list
- # cols_args = val_to_list(cols_args)
-
# if columns value is * get all dataframes columns
if is_regex is True:
r = re.compile(cols_args[0])
@@ -335,10 +340,6 @@ def parse_columns(df, cols_args, get_args=False, is_regex=None, filter_by_column
elif cols_args == "*" or cols_args is None:
cols = df.columns
-
- # Return filtered columns
- # columns_filtered = list(set(columns) - set(columns_filtered))
-
# In case we have a list of tuples we use the first element of the tuple is taken as the column name
# and the rest as params. We can use the param in a custom function as follow
# def func(attrs): attrs return (1,2) and (3,4)
@@ -362,13 +363,15 @@ def parse_columns(df, cols_args, get_args=False, is_regex=None, filter_by_column
check_for_missing_columns(df, cols)
# Filter by column data type
- filter_by_column_dtypes = val_to_list(filter_by_column_dtypes)
+ if filter_by_column_dtypes is not None:
+ filter_by_column_dtypes = val_to_list(filter_by_column_dtypes)
columns_residual = None
# If necessary filter the columns by data type
- if is_list_of_strings(filter_by_column_dtypes):
+ if filter_by_column_dtypes:
# Get columns for every data type
+
columns_filtered = filter_col_name_by_dtypes(df, filter_by_column_dtypes)
# Intersect the columns filtered per data type from the whole dataframe with the columns passed to the function
@@ -378,10 +381,13 @@ def parse_columns(df, cols_args, get_args=False, is_regex=None, filter_by_column
columns_residual = list(OrderedSet(cols) - OrderedSet(columns_filtered))
else:
final_columns = cols
- # final_columns = escape_columns(final_columns)
+
# Return cols or cols an params
cols_params = []
+ if invert:
+ final_columns = list(OrderedSet(cols) - OrderedSet(final_columns))
+
if get_args is True:
cols_params = final_columns, attrs
elif get_args is False:
@@ -389,9 +395,9 @@ def parse_columns(df, cols_args, get_args=False, is_regex=None, filter_by_column
else:
RaiseIt.value_error(get_args, ["True", "False"])
- if columns_residual:
- print(",".join(escape_columns(columns_residual)), "column(s) was not processed because is/are not",
- ",".join(filter_by_column_dtypes))
+ # if columns_residual:
+ # print(",".join(escape_columns(columns_residual)), "column(s) was not processed because is/are not",
+ # ",".join(filter_by_column_dtypes))
return cols_params
@@ -533,7 +539,7 @@ def json_enconding(obj):
:param obj:
:return:
"""
- return json.dumps(obj, sort_keys=True, default=json_converter)
+ return json.dumps(obj, default=json_converter)
def debug(value):
diff --git a/optimus/io/jdbc.py b/optimus/io/jdbc.py
index 91ba83aa1..dea9e0a45 100644
--- a/optimus/io/jdbc.py
+++ b/optimus/io/jdbc.py
@@ -130,7 +130,7 @@ def table_to_df(self, table_name, columns="*", limit=None):
def execute(self, query, limit=None):
"""
Execute a SQL query
- :param limit: default limit the whole query. We play defensive here in case the result is a big chunck of data
+ :param limit: default limit the whole query. We play defensive here in case the result is a big chunk of data
:param query: SQL query string
:return:
"""
@@ -146,6 +146,23 @@ def execute(self, query, limit=None):
.option("password", self.password) \
.load()
+ def df_to_table(self, df, table, mode="overwrite"):
+ """
+ Send a dataframe to the database
+ :param df:
+ :param table:
+ :param mode
+ :return:
+ """
+ return df.write \
+ .format("jdbc") \
+ .mode(mode) \
+ .option("url", self.url) \
+ .option("dbtable", table) \
+ .option("user", self.user) \
+ .option("password", self.password) \
+ .save()
+
@staticmethod
def _limit(limit=None):
"""
@@ -167,7 +184,7 @@ class Table:
def __init__(self, db):
self.db = db
- def show(self, table_names="*", limit="all"):
+ def show(self, table_names="*", limit=None):
db = self.db
if table_names is "*":
diff --git a/optimus/io/load.py b/optimus/io/load.py
index 181de57ae..c0b4a3147 100644
--- a/optimus/io/load.py
+++ b/optimus/io/load.py
@@ -59,10 +59,23 @@ def json(path, multiline=False, *args, **kwargs):
raise
return replace_columns_special_characters(df)
+ @staticmethod
+ def tsv(path, header='true', infer_schema='true', *args, **kwargs):
+ """
+ Return a dataframe from a tsv file.
+ :param path: path or location of the file.
+ :param header: tell the function whether dataset has a header row. 'true' default.
+ :param infer_schema: infers the input schema automatically from data.
+ It requires one extra pass over the data. 'true' default.
+
+ :return:
+ """
+ return Load.csv(path, sep='\t', header=header, infer_schema=infer_schema, *args, **kwargs)
+
@staticmethod
def csv(path, sep=',', header='true', infer_schema='true', *args, **kwargs):
"""
- Return a dataframe from a csv file.. It is the same read.csv Spark function with some predefined
+ Return a dataframe from a csv file. It is the same read.csv Spark function with some predefined
params
:param path: path or location of the file.
diff --git a/optimus/jars/RedshiftJDBC42-1.2.16.1027.jar b/optimus/jars/RedshiftJDBC42-1.2.16.1027.jar
new file mode 100644
index 000000000..007c39299
Binary files /dev/null and b/optimus/jars/RedshiftJDBC42-1.2.16.1027.jar differ
diff --git a/optimus/jars/mysql-connector-java-8.0.16.jar b/optimus/jars/mysql-connector-java-8.0.16.jar
new file mode 100644
index 000000000..acd5ea69a
Binary files /dev/null and b/optimus/jars/mysql-connector-java-8.0.16.jar differ
diff --git a/optimus/jars/ojdbc7.jar b/optimus/jars/ojdbc7.jar
new file mode 100644
index 000000000..fd38a6c4a
Binary files /dev/null and b/optimus/jars/ojdbc7.jar differ
diff --git a/optimus/jars/postgresql-42.2.5.jar b/optimus/jars/postgresql-42.2.5.jar
new file mode 100644
index 000000000..d89d4331a
Binary files /dev/null and b/optimus/jars/postgresql-42.2.5.jar differ
diff --git a/optimus/ml/feature.py b/optimus/ml/feature.py
index 919dfb527..58e5ab858 100644
--- a/optimus/ml/feature.py
+++ b/optimus/ml/feature.py
@@ -1,10 +1,9 @@
from pyspark.ml import feature, Pipeline
from pyspark.ml.feature import StringIndexer, IndexToString, OneHotEncoder, VectorAssembler, Normalizer
-from pyspark.ml.linalg import DenseVector, VectorUDT
-from pyspark.sql import functions as F
-from optimus.helpers.checkit import is_dataframe, is_
+from optimus.helpers.checkit import is_dataframe, is_, is_str
from optimus.helpers.functions import parse_columns
+from optimus.helpers.raiseit import RaiseIt
def n_gram(df, input_col, n=2):
@@ -29,7 +28,7 @@ def n_gram(df, input_col, n=2):
return df_model, tfidf_model
-def string_to_index(df, input_cols):
+def string_to_index(df, input_cols, **kargs):
"""
Maps a string column of labels to an ML column of label indices. If the input column is
numeric, we cast it to string and index the string values.
@@ -40,7 +39,7 @@ def string_to_index(df, input_cols):
input_cols = parse_columns(df, input_cols)
- indexers = [StringIndexer(inputCol=column, outputCol=column + "_index").fit(df) for column in
+ indexers = [StringIndexer(inputCol=column, outputCol=column + "_index", **kargs).fit(df) for column in
list(set(input_cols))]
pipeline = Pipeline(stages=indexers)
@@ -49,7 +48,7 @@ def string_to_index(df, input_cols):
return df
-def index_to_string(df, input_cols):
+def index_to_string(df, input_cols, **kargs):
"""
Maps a column of indices back to a new column of corresponding string values. The index-string mapping is
either from the ML attributes of the input column, or from user-supplied labels (which take precedence over
@@ -61,7 +60,7 @@ def index_to_string(df, input_cols):
input_cols = parse_columns(df, input_cols)
- indexers = [IndexToString(inputCol=column, outputCol=column + "_string") for column in
+ indexers = [IndexToString(inputCol=column, outputCol=column + "_string", **kargs) for column in
list(set(input_cols))]
pipeline = Pipeline(stages=indexers)
@@ -70,7 +69,7 @@ def index_to_string(df, input_cols):
return df
-def one_hot_encoder(df, input_cols):
+def one_hot_encoder(df, input_cols, **kargs):
"""
Maps a column of label indices to a column of binary vectors, with at most a single one-value.
:param df: Dataframe to be transformed
@@ -80,7 +79,7 @@ def one_hot_encoder(df, input_cols):
input_cols = parse_columns(df, input_cols)
- encode = [OneHotEncoder(inputCol=column, outputCol=column + "_encoded") for column in
+ encode = [OneHotEncoder(inputCol=column, outputCol=column + "_encoded", **kargs) for column in
list(set(input_cols))]
pipeline = Pipeline(stages=encode)
@@ -119,31 +118,16 @@ def normalizer(df, input_cols, p=2.0):
"""
# Check if columns argument must be a string or list datatype:
+ if is_(input_cols, [str, list]):
+ RaiseIt.type_error(input_cols, [str, list])
- assert isinstance(input_cols, (str, list)), \
- "Error: %s argument must be a string or a list." % "input_cols"
-
- if isinstance(input_cols, str):
+ if is_str(input_cols):
input_cols = [input_cols]
- assert isinstance(p, (float, int)), "Error: p argument must be a numeric value."
-
- # Convert ArrayType() column to DenseVector
- def arr_to_vec(arr_column):
- """
- :param arr_column: Column name
- :return: Returns DenseVector by converting an ArrayType() column
- """
- return DenseVector(arr_column)
-
- # User-Defined function
- # TODO: use apply() to use Pyarrow
- udf_arr_to_vec = F.udf(arr_to_vec, VectorUDT())
+ if is_(input_cols, [float, int]):
+ RaiseIt.type_error(input_cols, [float, int])
- # Check for columns which are not DenseVector types and convert them into DenseVector
- for col in input_cols:
- if not is_(df[col], DenseVector):
- df = df.withColumn(col, udf_arr_to_vec(df[col]))
+ df = df.cols.cast(input_cols, "vector")
normal = [Normalizer(inputCol=column, outputCol=column + "_normalized", p=p) for column in
list(set(input_cols))]
diff --git a/optimus/ml/models.py b/optimus/ml/models.py
index 774ba1d05..a5ccac2a7 100644
--- a/optimus/ml/models.py
+++ b/optimus/ml/models.py
@@ -1,19 +1,13 @@
from pyspark.ml import feature, classification
from pyspark.ml.classification import RandomForestClassifier, DecisionTreeClassifier, GBTClassifier
from pyspark.sql.functions import *
-from pyspark.sql.session import SparkSession
from pysparkling import *
from pysparkling.ml import H2OAutoML, H2ODeepLearning, H2OXGBoost, H2OGBM
-from optimus.helpers.checkit import is_dataframe
+from optimus.helpers.checkit import is_dataframe, is_str
from optimus.helpers.functions import parse_columns
from optimus.ml.feature import string_to_index, vector_assembler
-
-# from optimus.spark import Spark
-# spark = Spark.instance.spark()
-# TODO: not use getOrCreate use the singleton in optimus.spark
-
-spark = SparkSession.builder.getOrCreate()
+from optimus.spark import Spark
class ML:
@@ -77,7 +71,8 @@ def decision_tree(df, columns, input_col, **kargs):
columns = parse_columns(df, columns)
- assert isinstance(input_col, str), "Error, input column must be a string"
+ if not is_str(input_col):
+ raise TypeError("Error, input column must be a string")
data = df.select(columns)
feats = data.columns
@@ -109,7 +104,8 @@ def gbt(df, columns, input_col, **kargs):
columns = parse_columns(df, columns)
- assert isinstance(input_col, str), "Error, input column must be a string"
+ if not is_str(input_col):
+ raise TypeError("Error, input column must be a string")
data = df.select(columns)
feats = data.columns
@@ -129,7 +125,7 @@ def gbt(df, columns, input_col, **kargs):
@staticmethod
def h2o_automl(df, label, columns, **kargs):
- H2OContext.getOrCreate(spark)
+ H2OContext.getOrCreate(Spark.instance.spark)
df_sti = string_to_index(df, input_cols=label)
df_va = vector_assembler(df_sti, input_cols=columns)
@@ -137,7 +133,7 @@ def h2o_automl(df, label, columns, **kargs):
maxRuntimeSecs=60, # 1 minutes
seed=1,
maxModels=3,
- predictionCol=label + "_index",
+ labelCol=label + "_index",
**kargs)
model = automl.fit(df_va)
@@ -150,7 +146,7 @@ def h2o_automl(df, label, columns, **kargs):
@staticmethod
def h2o_deeplearning(df, label, columns, **kargs):
- H2OContext.getOrCreate(spark)
+ H2OContext.getOrCreate(Spark.instance.spark)
df_sti = string_to_index(df, input_cols=label)
df_va = vector_assembler(df_sti, input_cols=columns)
@@ -160,7 +156,7 @@ def h2o_deeplearning(df, label, columns, **kargs):
l2=0.0,
hidden=[200, 200],
featuresCols=columns,
- predictionCol=label,
+ labelCol=label,
**kargs)
model = h2o_deeplearning.fit(df_va)
df_raw = model.transform(df_va)
@@ -172,13 +168,13 @@ def h2o_deeplearning(df, label, columns, **kargs):
@staticmethod
def h2o_xgboost(df, label, columns, **kargs):
- H2OContext.getOrCreate(spark)
+ H2OContext.getOrCreate(Spark.instance.spark)
df_sti = string_to_index(df, input_cols=label)
df_va = vector_assembler(df_sti, input_cols=columns)
h2o_xgboost = H2OXGBoost(convertUnknownCategoricalLevelsToNa=True,
featuresCols=columns,
- predictionCol=label,
+ labelCol=label,
**kargs)
model = h2o_xgboost.fit(df_va)
df_raw = model.transform(df_va)
@@ -190,14 +186,14 @@ def h2o_xgboost(df, label, columns, **kargs):
@staticmethod
def h2o_gbm(df, label, columns, **kargs):
- H2OContext.getOrCreate(spark)
+ H2OContext.getOrCreate(Spark.instance.spark)
df_sti = string_to_index(df, input_cols=label)
df_va = vector_assembler(df_sti, input_cols=columns)
h2o_gbm = H2OGBM(ratio=0.8,
seed=1,
featuresCols=columns,
- predictionCol=label,
+ labelCol=label,
**kargs)
model = h2o_gbm.fit(df_va)
df_raw = model.transform(df_va)
diff --git a/optimus/optimus.py b/optimus/optimus.py
index 34f471a90..98b87f12b 100644
--- a/optimus/optimus.py
+++ b/optimus/optimus.py
@@ -1,12 +1,14 @@
import os
import sys
+from pathlib import Path
from shutil import rmtree
-from deepdiff import DeepDiff # For Deep Difference of 2 objects
+from deepdiff import DeepDiff
from pyspark.sql import DataFrame
from optimus.enricher import Enricher
from optimus.functions import append, Create
+from optimus.helpers.checkit import is_list
from optimus.helpers.constants import *
from optimus.helpers.convert import val_to_list
from optimus.helpers.functions import print_html, print_json
@@ -31,6 +33,7 @@ def __init__(self, session=None, master="local[*]", app_name="optimus", checkpoi
repositories=None,
packages=None,
jars=None,
+ driver_class_path=None,
options=None,
additional_options=None,
queue_url=None,
@@ -84,10 +87,15 @@ def __init__(self, session=None, master="local[*]", app_name="optimus", checkpoi
self.packages = packages
self.repositories = repositories
- if jars is None:
- jars = {}
-
+ # Jars
self.jars = jars
+ self._add_jars(jars)
+
+ # Class Drive Path
+ self.driver_class_path = driver_class_path
+ self._add_driver_class_path(driver_class_path)
+
+ # Additional Options
self.additional_options = additional_options
self.verbose(verbose)
@@ -96,7 +104,20 @@ def __init__(self, session=None, master="local[*]", app_name="optimus", checkpoi
# TODO:
# if the Spark 2.4 version is going to be used this is not neccesesary.
# Maybe we can check a priori which version fo Spark is going to be used
- # self._add_spark_packages(["com.databricks:spark-avro_2.11:4.0.0"])
+ self._add_spark_packages(["com.databricks:spark-avro_2.11:4.0.0"])
+
+ def c(files):
+ return [Path(path + file).as_posix() for file in files]
+
+ path = os.path.dirname(os.path.abspath(__file__))
+
+ # Add databases jars
+ self._add_jars(["../jars/RedshiftJDBC42-1.2.16.1027.jar", "../jars/mysql-connector-java-8.0.16.jar",
+ "../jars/ojdbc7.jar", "../jars/postgresql-42.2.5.jar"])
+
+ self._add_driver_class_path(
+ c(["//jars//RedshiftJDBC42-1.2.16.1027.jar", "//jars//mysql-connector-java-8.0.16.jar",
+ "//jars//ojdbc7.jar", "//jars//postgresql-42.2.5.jar"]))
self._start_session()
@@ -299,20 +320,22 @@ def append(dfs, like):
"""
return append(dfs, like)
+ def _setup_repositories(self):
+ if self.repositories:
+ return '--repositories {}'.format(','.join(self.repositories))
+ else:
+ return ''
+
+ # Spark Package
def _add_spark_packages(self, packages):
"""
Define the Spark packages that must be loaded at start time
:param packages:
:return:
"""
- for p in packages:
- self.packages.append(p)
- def _setup_repositories(self):
- if self.repositories:
- return '--repositories {}'.format(','.join(self.repositories))
- else:
- return ''
+ for p in val_to_list(packages):
+ self.packages.append(p)
def _setup_packages(self):
if self.packages:
@@ -320,12 +343,37 @@ def _setup_packages(self):
else:
return ''
+ # Jar
+ def _add_jars(self, jar):
+ if self.jars is None:
+ self.jars = []
+
+ if is_list(jar):
+ for j in val_to_list(jar):
+ self.jars.append(j)
+
def _setup_jars(self):
if self.jars:
return '--jars {}'.format(','.join(self.jars))
else:
return ''
+ # Driver class path
+ def _add_driver_class_path(self, driver_class_path):
+ if self.driver_class_path is None:
+ self.driver_class_path = []
+
+ if is_list(driver_class_path):
+ for d in val_to_list(driver_class_path):
+ self.driver_class_path.append(d)
+
+ def _setup_driver_class_path(self):
+ if self.driver_class_path:
+ return '--driver-class-path {}'.format(';'.join(self.driver_class_path))
+ else:
+ return ''
+
+ # Options
def _setup_options(self, additional_options):
options = {}
@@ -359,6 +407,7 @@ def _start_session(self):
self._setup_repositories(),
self._setup_packages(),
self._setup_jars(),
+ self._setup_driver_class_path(),
self._setup_options(self.additional_options),
'pyspark-shell',
]
diff --git a/optimus/profiler/functions.py b/optimus/profiler/functions.py
index 4885098a3..ab46db83d 100644
--- a/optimus/profiler/functions.py
+++ b/optimus/profiler/functions.py
@@ -42,7 +42,7 @@ def write_json(data, path):
"""
try:
with open(path, 'w', encoding='utf-8') as outfile:
- json.dump(data, outfile, sort_keys=True, indent=4, ensure_ascii=False, default=json_converter)
+ json.dump(data, outfile, indent=4, ensure_ascii=False, default=json_converter)
except IOError:
pass
diff --git a/optimus/spark.py b/optimus/spark.py
index 0f3f3e350..6be46bc06 100644
--- a/optimus/spark.py
+++ b/optimus/spark.py
@@ -31,17 +31,12 @@ def create(self, master="local[*]", app_name="optimus"):
logger.print("-----")
logger.print(STARTING_SPARK)
- # print(os.environ['PYSPARK_SUBMIT_ARGS'])
-
# Build the spark session
self._spark = SparkSession.builder \
.appName(app_name) \
.master(master) \
- .config("spark.executor.heartbeatInterval", "110") \
- .config("spark.jars.packages", "ml.combust.mleap:mleap-spark_2.11:0.13.0") \
.getOrCreate()
- # .option("driver", "org.postgresql.Driver")
self._sc = self._spark.sparkContext
logger.print("Spark Version:" + self._sc.version)
diff --git a/optimus/version.py b/optimus/version.py
index a40d50a0e..342c856eb 100644
--- a/optimus/version.py
+++ b/optimus/version.py
@@ -5,5 +5,5 @@ def _safe_int(string):
return string
-__version__ = '2.2.51'
+__version__ = '2.2.6'
VERSION = tuple(_safe_int(x) for x in __version__.split('.'))
diff --git a/readme_.ipynb b/readme_.ipynb
index 957b34fc1..c305f71ee 100644
--- a/readme_.ipynb
+++ b/readme_.ipynb
@@ -82,7 +82,7 @@
},
{
"cell_type": "code",
- "execution_count": 3,
+ "execution_count": 4,
"metadata": {},
"outputs": [
{
@@ -191,7 +191,7 @@
},
{
"cell_type": "code",
- "execution_count": 4,
+ "execution_count": 5,
"metadata": {},
"outputs": [
{
@@ -302,7 +302,7 @@
},
{
"cell_type": "code",
- "execution_count": 5,
+ "execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
@@ -336,7 +336,7 @@
},
{
"cell_type": "code",
- "execution_count": 6,
+ "execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
@@ -390,7 +390,7 @@
},
{
"cell_type": "code",
- "execution_count": 7,
+ "execution_count": 8,
"metadata": {
"lines_to_next_cell": 2
},
@@ -400,7 +400,14 @@
"output_type": "stream",
"text": [
"Loading page (1/2)\n",
+ "[> ] 0%\r",
+ "[======> ] 10%\r",
+ "[==============================> ] 50%\r",
+ "[============================================================] 100%\r",
"Rendering (2/2) \n",
+ "[> ] 0%\r",
+ "[===============> ] 25%\r",
+ "[============================================================] 100%\r",
"Done \n"
]
},
@@ -435,17 +442,9 @@
},
{
"cell_type": "code",
- "execution_count": 8,
+ "execution_count": 9,
"metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "`japanese name`,`attributes`,`tiemstamp`,`nulltype` column(s) was not processed because is/are not byte,short,big,int,double,float,string,date,bool\n"
- ]
- }
- ],
+ "outputs": [],
"source": [
"# This is a custom function\n",
"def func(value, arg):\n",
@@ -480,7 +479,7 @@
},
{
"cell_type": "code",
- "execution_count": 9,
+ "execution_count": 10,
"metadata": {},
"outputs": [
{
@@ -488,7 +487,14 @@
"output_type": "stream",
"text": [
"Loading page (1/2)\n",
+ "[> ] 0%\r",
+ "[======> ] 10%\r",
+ "[==============================> ] 50%\r",
+ "[============================================================] 100%\r",
"Rendering (2/2) \n",
+ "[> ] 0%\r",
+ "[===============> ] 25%\r",
+ "[============================================================] 100%\r",
"Done \n"
]
},
@@ -506,120 +512,1454 @@
}
],
"source": [
- "df.table_image(\"images/table1.png\")"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "Into this"
+ "df.table_image(\"images/table1.png\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Into this"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Loading page (1/2)\n",
+ "[> ] 0%\r",
+ "[======> ] 10%\r",
+ "[==============================> ] 50%\r",
+ "[============================================================] 100%\r",
+ "Rendering (2/2) \n",
+ "[> ] 0%\r",
+ "[===============> ] 25%\r",
+ "[============================================================] 100%\r",
+ "Done \n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ ""
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "new_df.table_image(\"images/table2.png\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "lines_to_next_cell": 0
+ },
+ "source": [
+ "Note that you can use Optimus functions and Spark functions(`.WithColumn()`) and all the df function availables in a Spark Dataframe at the same time. To know about all the Optimus functionality please go to this [notebooks](examples/)\n",
+ "\n",
+ "### Custom functions\n",
+ "Spark have multiple ways to transform your data like rdd, Column Expression ,udf and pandas udf. In Optimus we create the `apply()` and `apply_expr` which handle all the implementation complexity.\n",
+ "\n",
+ "Here you apply a function to the \"billingid\" column. Sum 1 and 2 to the current column value. All powered by Pandas UDF"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Loading page (1/2)\n",
+ "[> ] 0%\r",
+ "[======> ] 10%\r",
+ "[==============================> ] 50%\r",
+ "[============================================================] 100%\r",
+ "Rendering (2/2) \n",
+ "[> ] 0%\r",
+ "[===============> ] 25%\r",
+ "[============================================================] 100%\r",
+ "Done \n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ ""
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "def func(value, args):\n",
+ " return value + args[0] + args[1]\n",
+ "\n",
+ "df.cols.apply(\"height(ft)\",func,\"int\", [1,2]).table_image(\"images/table3.png\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "If you want to apply a Column Expression use `apply_expr()` like this. In this case we pasa an argument 10 to divide the actual column value"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Loading page (1/2)\n",
+ "[> ] 0%\r",
+ "[======> ] 10%\r",
+ "[==============================> ] 50%\r",
+ "[============================================================] 100%\r",
+ "Rendering (2/2) \n",
+ "[> ] 0%\r",
+ "[===============> ] 25%\r",
+ "[============================================================] 100%\r",
+ "Done \n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ ""
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "from pyspark.sql import functions as F\n",
+ "\n",
+ "def func(col_name, args):\n",
+ " return F.col(col_name)/20\n",
+ "\n",
+ "df.cols.apply(\"height(ft)\", func=func, args=20).table_image(\"images/table4.png\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "You can change the table output back to ascii if you which"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 14,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "op.output(\"ascii\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "To return to HTML just:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 15,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "op.output(\"html\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Data profiling\n",
+ "\n",
+ "Optimus comes with a powerful and unique data profiler. Besides basic and advance stats like min, max, kurtosis, mad etc, \n",
+ "it also let you know what type of data has every column. For example if a string column have string, integer, float, bool, date Optimus can give you an unique overview about your data. \n",
+ "Just run `df.profile(\"*\")` to profile all the columns. For more info about the profiler please go to this [notebook](examples/new-api-profiler.ipynb).\n",
+ "\n",
+ "Let's load a \"big\" dataset"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 16,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df = op.load.csv(\"https://raw.githubusercontent.com/ironmussa/Optimus/master/examples/data/Meteorite_Landings.csv\").h_repartition()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Numeric"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 40,
+ "metadata": {
+ "scrolled": false
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Including 'nan' as Null in processing 'name'\n",
+ "Including 'nan' as Null in processing 'nametype'\n",
+ "Including 'nan' as Null in processing 'recclass'\n",
+ "Including 'nan' as Null in processing 'fall'\n",
+ "Including 'nan' as Null in processing 'year'\n",
+ "Including 'nan' as Null in processing 'GeoLocation'\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
Overview
\n",
+ "\n",
+ "\n",
+ "
\n",
+ "
Dataset info
\n",
+ "
\n",
+ " \n",
+ " \n",
+ " Number of columns | \n",
+ " 10 | \n",
+ "\n",
+ "
\n",
+ " \n",
+ " Number of rows | \n",
+ " 45716 | \n",
+ "\n",
+ "
\n",
+ " \n",
+ " Total Missing (%) | \n",
+ " 0.49% | \n",
+ "\n",
+ "
\n",
+ " \n",
+ " Total size in memory | \n",
+ " -1 Bytes | \n",
+ "\n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
Column types
\n",
+ "
\n",
+ " \n",
+ " \n",
+ " String | \n",
+ " 0 | \n",
+ "\n",
+ "
\n",
+ " \n",
+ " Numeric | \n",
+ " 1 | \n",
+ "\n",
+ "
\n",
+ " \n",
+ " Date | \n",
+ " 0 | \n",
+ "\n",
+ "
\n",
+ " \n",
+ " Bool | \n",
+ " 0 | \n",
+ "\n",
+ "
\n",
+ " \n",
+ " Array | \n",
+ " 0 | \n",
+ "\n",
+ "
\n",
+ " \n",
+ " Not available | \n",
+ " 0 | \n",
+ "\n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "\n",
+ "\n",
+ "
\n",
+ "\n",
+ " \n",
+ "\n",
+ "
\n",
+ "
\n",
+ "
mass (g)
\n",
+ " numeric\n",
+ " \n",
+ "
\n",
+ " \n",
+ " \n",
+ " Unique | \n",
+ " 12497 | \n",
+ "
\n",
+ " \n",
+ " Unique (%) | \n",
+ " 27.336 | \n",
+ "
\n",
+ " \n",
+ " Missing | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " Missing (%) | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ " Datatypes\n",
+ "
\n",
+ " \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " String\n",
+ " | \n",
+ " \n",
+ " 0\n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " Integer\n",
+ " | \n",
+ " \n",
+ " 0\n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " Float\n",
+ " | \n",
+ " \n",
+ " 0\n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " Bool\n",
+ " | \n",
+ " \n",
+ " 0\n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " Date\n",
+ " | \n",
+ " \n",
+ " 0\n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " Missing\n",
+ " | \n",
+ " \n",
+ " 0\n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " Null\n",
+ " | \n",
+ " \n",
+ " 131\n",
+ " | \n",
+ "\n",
+ "
\n",
+ " \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
\n",
+ " Basic Stats\n",
+ "
\n",
+ "\n",
+ " \n",
+ "
\n",
+ " \n",
+ " \n",
+ " Mean | \n",
+ " 13278.07855 | \n",
+ "
\n",
+ " \n",
+ " Minimum | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " Maximum | \n",
+ " 60000000.0 | \n",
+ "
\n",
+ " \n",
+ " Zeros(%) | \n",
+ " | \n",
+ "
\n",
+ "\n",
+ " \n",
+ "
\n",
+ " \n",
+ "\n",
+ "
\n",
+ "
\n",
+ "
Frequency
\n",
+ "
\n",
+ "\n",
+ " \n",
+ " Value | \n",
+ " Count | \n",
+ " Frecuency (%) | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " 1.3 | \n",
+ " 171 | \n",
+ " 0.374% | \n",
+ "
\n",
+ "\n",
+ " \n",
+ " \n",
+ " 1.2 | \n",
+ " 140 | \n",
+ " 0.306% | \n",
+ "
\n",
+ "\n",
+ " \n",
+ " \n",
+ " 1.4 | \n",
+ " 138 | \n",
+ " 0.302% | \n",
+ "
\n",
+ "\n",
+ " \n",
+ " \n",
+ " None | \n",
+ " 131 | \n",
+ " 0.287% | \n",
+ "
\n",
+ "\n",
+ " \n",
+ " \n",
+ " 2.1 | \n",
+ " 130 | \n",
+ " 0.284% | \n",
+ "
\n",
+ "\n",
+ " \n",
+ " \n",
+ " 2.4 | \n",
+ " 126 | \n",
+ " 0.276% | \n",
+ "
\n",
+ "\n",
+ " \n",
+ " \n",
+ " 1.6 | \n",
+ " 120 | \n",
+ " 0.262% | \n",
+ "
\n",
+ "\n",
+ " \n",
+ " \n",
+ " 0.5 | \n",
+ " 119 | \n",
+ " 0.26% | \n",
+ "
\n",
+ "\n",
+ " \n",
+ " \n",
+ " 1.1 | \n",
+ " 116 | \n",
+ " 0.254% | \n",
+ "
\n",
+ "\n",
+ " \n",
+ " \n",
+ " 3.8 | \n",
+ " 114 | \n",
+ " 0.249% | \n",
+ "
\n",
+ "\n",
+ " \n",
+ " \n",
+ " \"Missing\" | \n",
+ " 0 | \n",
+ " 0.0% | \n",
+ "
\n",
+ "\n",
+ "
\n",
+ "
\n",
+ " \n",
+ "\n",
+ " \n",
+ "
\n",
+ "\n",
+ "\n",
+ "
Quantile statistics
\n",
+ "
\n",
+ " \n",
+ " \n",
+ " Minimum | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " 5-th percentile | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " Q1 | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " Median | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " Q3 | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " 95-th percentile | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " Maximum | \n",
+ " 60000000.0 | \n",
+ "
\n",
+ " \n",
+ " Range | \n",
+ " 60000000.0 | \n",
+ "
\n",
+ " \n",
+ " Interquartile range | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
Descriptive statistics
\n",
+ "
\n",
+ " \n",
+ " \n",
+ " Standard deviation | \n",
+ " 574988.87641 | \n",
+ "
\n",
+ " \n",
+ " Coef of variation | \n",
+ " 43.30362 | \n",
+ "
\n",
+ " \n",
+ " Kurtosis | \n",
+ " 6796.17061 | \n",
+ "
\n",
+ " \n",
+ " Mean | \n",
+ " 13278.07855 | \n",
+ "
\n",
+ " \n",
+ " MAD | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " Skewness | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " Sum | \n",
+ " 605281210.638 | \n",
+ "
\n",
+ " \n",
+ " Variance | \n",
+ " 330612207995.7785 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
\n",
+ " \n",
+ " \n",
+ "\n",
+ " \n",
+ "\n",
+ " \n",
+ " \n",
+ " \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ "\n",
+ " | \n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ "\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "\n",
+ "\n",
+ "\n",
+ "Viewing 10 of 45716 rows / 10 columns
\n",
+ "32 partition(s)
\n",
+ "\n",
+ "\n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " name \n",
+ " 1 (string) \n",
+ " \n",
+ " \n",
+ " nullable\n",
+ " \n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " id \n",
+ " 2 (int) \n",
+ " \n",
+ " \n",
+ " nullable\n",
+ " \n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " nametype \n",
+ " 3 (string) \n",
+ " \n",
+ " \n",
+ " nullable\n",
+ " \n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " recclass \n",
+ " 4 (string) \n",
+ " \n",
+ " \n",
+ " nullable\n",
+ " \n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " mass (g) \n",
+ " 5 (double) \n",
+ " \n",
+ " \n",
+ " nullable\n",
+ " \n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " fall \n",
+ " 6 (string) \n",
+ " \n",
+ " \n",
+ " nullable\n",
+ " \n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " year \n",
+ " 7 (string) \n",
+ " \n",
+ " \n",
+ " nullable\n",
+ " \n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " reclat \n",
+ " 8 (double) \n",
+ " \n",
+ " \n",
+ " nullable\n",
+ " \n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " reclong \n",
+ " 9 (double) \n",
+ " \n",
+ " \n",
+ " nullable\n",
+ " \n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " GeoLocation \n",
+ " 10 (string) \n",
+ " \n",
+ " \n",
+ " nullable\n",
+ " \n",
+ " \n",
+ " | \n",
+ " \n",
+ "
\n",
+ "\n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " Acfer⋅232\n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " 240\n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " Valid\n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " H5\n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " 725.0\n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " Found\n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " 01/01/1991⋅12:00:00⋅AM\n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " 27.73944\n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " 4.32833\n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " (27.739440,⋅4.328330)\n",
+ " \n",
+ " | \n",
+ " \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " Asuka⋅87197\n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " 2554\n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " Valid\n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " H4\n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " 124.99\n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " Found\n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " 01/01/1987⋅12:00:00⋅AM\n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " -72.0\n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " 26.0\n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " (-72.000000,⋅26.000000)\n",
+ " \n",
+ " | \n",
+ " \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " Gladstone⋅(iron)\n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " 10920\n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " Valid\n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " Iron,⋅IAB-MG\n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " 736600.0\n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " Found\n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " 01/01/1915⋅12:00:00⋅AM\n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " -23.9\n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " 151.3\n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " (-23.900000,⋅151.300000)\n",
+ " \n",
+ " | \n",
+ " \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " Nullarbor⋅015\n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " 17955\n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " Valid\n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " L6\n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " 3986.0\n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " Found\n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " 01/01/1980⋅12:00:00⋅AM\n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " None\n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " None\n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " None\n",
+ " \n",
+ " | \n",
+ " \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " Lewis⋅Cliff⋅86533\n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " 13461\n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " Valid\n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " H5\n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " 15.7\n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " Found\n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " 01/01/1986⋅12:00:00⋅AM\n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " -84.26756\n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " 161.3631\n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " (-84.267560,⋅161.363100)\n",
+ " \n",
+ " | \n",
+ " \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " Grove⋅Mountains⋅053589\n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " 48447\n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " Valid\n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " L5\n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " 1.4\n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " Found\n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " 01/01/2006⋅12:00:00⋅AM\n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " -72.7825\n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " 75.300278\n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " (-72.782500,⋅75.300278)\n",
+ " \n",
+ " | \n",
+ " \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " Sayh⋅al⋅Uhaymir⋅108\n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " 23300\n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " Valid\n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " H6\n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " 16.0\n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " Found\n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " 01/01/2001⋅12:00:00⋅AM\n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " 21.06667\n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " 57.31667\n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " (21.066670,⋅57.316670)\n",
+ " \n",
+ " | \n",
+ " \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " Northwest⋅Africa⋅3088\n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " 31218\n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " Valid\n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " L6\n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " 171.0\n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " Found\n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " 01/01/2003⋅12:00:00⋅AM\n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " None\n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " None\n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " None\n",
+ " \n",
+ " | \n",
+ " \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " Reckling⋅Peak⋅92423\n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " 22432\n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " Valid\n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " L6\n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " 3.8\n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " Found\n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " 01/01/1992⋅12:00:00⋅AM\n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " -76.22029\n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " 158.37967\n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " (-76.220290,⋅158.379670)\n",
+ " \n",
+ " | \n",
+ " \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " \n",
+ " Sweetwater\n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " 23770\n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " Valid\n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " H5\n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " 1760.0\n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " Found\n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " 01/01/1961⋅12:00:00⋅AM\n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " 32.55\n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " -100.41667\n",
+ " \n",
+ " | \n",
+ " \n",
+ " \n",
+ " (32.550000,⋅-100.416670)\n",
+ " \n",
+ " | \n",
+ " \n",
+ "
\n",
+ " \n",
+ " \n",
+ "
\n",
+ "\n",
+ "\n",
+ "Viewing 10 of 45716 rows / 10 columns
\n",
+ "32 partition(s)
\n"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "op.profiler.run(df, \"mass (g)\", infer=False)"
]
},
{
"cell_type": "code",
- "execution_count": 10,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Loading page (1/2)\n",
- "Rendering (2/2) \n",
- "Done \n"
- ]
- },
- {
- "data": {
- "text/html": [
- ""
- ],
- "text/plain": [
- ""
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- }
- ],
- "source": [
- "new_df.table_image(\"images/table2.png\")"
- ]
- },
- {
- "cell_type": "markdown",
+ "execution_count": 41,
"metadata": {
- "lines_to_next_cell": 0
+ "scrolled": false
},
- "source": [
- "Note that you can use Optimus functions and Spark functions(`.WithColumn()`) and all the df function availables in a Spark Dataframe at the same time. To know about all the Optimus functionality please go to this [notebooks](examples/)\n",
- "\n",
- "### Custom functions\n",
- "Spark have multiple ways to transform your data like rdd, Column Expression ,udf and pandas udf. In Optimus we create the `apply()` and `apply_expr` which handle all the implementation complexity.\n",
- "\n",
- "Here you apply a function to the \"billingid\" column. Sum 1 and 2 to the current column value. All powered by Pandas UDF"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 11,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Loading page (1/2)\n",
- "Rendering (2/2) \n",
- "Done \n"
- ]
- },
- {
- "data": {
- "text/html": [
- ""
- ],
- "text/plain": [
- ""
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- }
- ],
- "source": [
- "def func(value, args):\n",
- " return value + args[0] + args[1]\n",
- "\n",
- "df.cols.apply(\"height(ft)\",func,\"int\", [1,2]).table_image(\"images/table3.png\")"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "If you want to apply a Column Expression use `apply_expr()` like this. In this case we pasa an argument 10 to divide the actual column value"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 12,
- "metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Loading page (1/2)\n",
+ "[> ] 0%\r",
+ "[======> ] 10%\r",
+ "[===================> ] 32%\r",
+ "Warning: Failed to load file:///tmp/optimus/styles/styles.css (ignore)\n",
+ "[=====================> ] 36%\r",
+ "[=========================> ] 43%\r",
+ "[============================================================] 100%\r",
"Rendering (2/2) \n",
+ "[> ] 0%\r",
+ "[===============> ] 25%\r",
+ "[============================================================] 100%\r",
"Done \n"
]
},
{
"data": {
"text/html": [
- ""
+ ""
],
"text/plain": [
""
@@ -630,71 +1970,12 @@
}
],
"source": [
- "from pyspark.sql import functions as F\n",
- "\n",
- "def func(col_name, args):\n",
- " return F.col(col_name)/20\n",
- "\n",
- "df.cols.apply(\"height(ft)\", func=func, args=20).table_image(\"images/table4.png\")"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "You can change the table output back to ascii if you which"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 13,
- "metadata": {},
- "outputs": [],
- "source": [
- "op.output(\"ascii\")"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "To return to HTML just:"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 15,
- "metadata": {},
- "outputs": [],
- "source": [
- "op.output(\"html\")"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "## Data profiling\n",
- "\n",
- "Optimus comes with a powerful and unique data profiler. Besides basic and advance stats like min, max, kurtosis, mad etc, \n",
- "it also let you know what type of data has every column. For example if a string column have string, integer, float, bool, date Optimus can give you an unique overview about your data. \n",
- "Just run `df.profile(\"*\")` to profile all the columns. For more info about the profiler please go to this [notebook](examples/new-api-profiler.ipynb).\n",
- "\n",
- "Let's load a \"big\" dataset"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 115,
- "metadata": {},
- "outputs": [],
- "source": [
- "df = op.load.csv(\"https://raw.githubusercontent.com/ironmussa/Optimus/master/examples/data/Meteorite_Landings.csv\").h_repartition()"
+ "op.profiler.to_image(path=\"images/profiler_numeric.png\")"
]
},
{
"cell_type": "code",
- "execution_count": 116,
+ "execution_count": 30,
"metadata": {},
"outputs": [
{
@@ -739,7 +2020,7 @@
" \n",
" \n",
" Total size in memory | \n",
- " 44.6 MB | \n",
+ " -1 Bytes | \n",
"\n",
"
\n",
" \n",
@@ -983,7 +2264,7 @@
" \n",
"\n",
" \n",
- " \n",
+ " \n",
" \n",
" | \n",
" \n",
@@ -992,7 +2273,7 @@
" \n",
" \n",
" \n",
- " \n",
+ " \n",
" \n",
"\n",
" | \n",
@@ -1784,7 +3065,7 @@
},
{
"cell_type": "code",
- "execution_count": 127,
+ "execution_count": 35,
"metadata": {},
"outputs": [
{
@@ -1792,8 +3073,17 @@
"output_type": "stream",
"text": [
"Loading page (1/2)\n",
- "Warning: Failed to load file:///C:/Users/ARGENI~1/AppData/Local/Temp/optimus/styles/styles.css (ignore)\n",
+ "[> ] 0%\r",
+ "[======> ] 10%\r",
+ "[===================> ] 33%\r",
+ "Warning: Failed to load file:///tmp/optimus/styles/styles.css (ignore)\n",
+ "[======================> ] 37%\r",
+ "[==========================> ] 44%\r",
+ "[============================================================] 100%\r",
"Rendering (2/2) \n",
+ "[> ] 0%\r",
+ "[===============> ] 25%\r",
+ "[============================================================] 100%\r",
"Done \n"
]
},
@@ -1825,7 +3115,7 @@
},
{
"cell_type": "code",
- "execution_count": 129,
+ "execution_count": 36,
"metadata": {},
"outputs": [
{
@@ -1869,7 +3159,7 @@
"
\n",
" \n",
" Total size in memory | \n",
- " 41.7 MB | \n",
+ " -1 Bytes | \n",
"\n",
"
\n",
" \n",
@@ -2112,7 +3402,7 @@
" \n",
" \n",
" \n",
- " \n",
+ " \n",
" \n",
"\n",
" | \n",
@@ -2123,7 +3413,7 @@
"
\n",
" \n",
" \n",
- " \n",
+ " \n",
" \n",
"\n",
" | \n",
@@ -2134,7 +3424,7 @@
"
\n",
" \n",
" \n",
- " \n",
+ " \n",
" \n",
"\n",
" | \n",
@@ -2145,7 +3435,7 @@
"
\n",
" \n",
" \n",
- " \n",
+ " \n",
" \n",
"\n",
" | \n",
@@ -2156,7 +3446,7 @@
"
\n",
" \n",
" \n",
- " \n",
+ " \n",
" \n",
"\n",
" | \n",
@@ -2167,7 +3457,7 @@
"
\n",
" \n",
" \n",
- " \n",
+ " \n",
" \n",
"\n",
" | \n",
@@ -2954,7 +4244,7 @@
},
{
"cell_type": "code",
- "execution_count": 130,
+ "execution_count": 37,
"metadata": {},
"outputs": [
{
@@ -2962,8 +4252,19 @@
"output_type": "stream",
"text": [
"Loading page (1/2)\n",
- "Warning: Failed to load file:///C:/Users/ARGENI~1/AppData/Local/Temp/optimus/styles/styles.css (ignore)\n",
+ "[> ] 0%\r",
+ "[======> ] 10%\r",
+ "[==================> ] 31%\r",
+ "Warning: Failed to load file:///tmp/optimus/styles/styles.css (ignore)\n",
+ "[=====================> ] 36%\r",
+ "[=======================> ] 39%\r",
+ "[========================> ] 41%\r",
+ "[===========================> ] 45%\r",
+ "[============================================================] 100%\r",
"Rendering (2/2) \n",
+ "[> ] 0%\r",
+ "[===============> ] 25%\r",
+ "[============================================================] 100%\r",
"Done \n"
]
},
@@ -2994,7 +4295,7 @@
},
{
"cell_type": "code",
- "execution_count": 40,
+ "execution_count": 42,
"metadata": {},
"outputs": [],
"source": [
@@ -3004,7 +4305,7 @@
},
{
"cell_type": "code",
- "execution_count": 18,
+ "execution_count": 43,
"metadata": {},
"outputs": [],
"source": [
@@ -3013,7 +4314,7 @@
},
{
"cell_type": "code",
- "execution_count": 22,
+ "execution_count": 44,
"metadata": {},
"outputs": [],
"source": [
@@ -3022,7 +4323,7 @@
},
{
"cell_type": "code",
- "execution_count": 31,
+ "execution_count": 45,
"metadata": {},
"outputs": [],
"source": [
@@ -3031,14 +4332,14 @@
},
{
"cell_type": "code",
- "execution_count": 38,
+ "execution_count": 46,
"metadata": {
"lines_to_next_cell": 0
},
"outputs": [
{
"data": {
- "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXQAAAEICAYAAABPgw/pAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4xLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvDW2N/gAAD11JREFUeJzt3X+s3XV9x/Hn695eUqhiS7kaVn5ctrJZ6TI1N0ZLN20x2SzL0EyDbFNm7tI0hsrWLbPu/qHL1qUms4506yZ4XUhmSxVFiDQq02JScMTbwRxw56gUC1j1GkFZoXrbfvbHPSVFW++5595zT/s5z0fS3Hu+5/vtefefZ7/5nHO+35RSkCSd+Xo6PYAkaXYYdEmqhEGXpEoYdEmqhEGXpEoYdEmqhEGXpEoYdEmqhEGXpEoYdFUvycYk30rybJJHkrytsb03yUeS/CDJ/iTXJylJ5jWef1mSkSQHkzyV5G+T9Hb2XyOd2rxODyDNgW8Bvwl8F3gH8G9JlgJXA28BXg0cAj79M8fdAnwPWAosAD4PPAF8bG7GlqYnXstF3SbJg8AHgRuAnaWUjzW2vxm4G+gDFgMHgIWllOcbz18LrC2lrOrI4NIUPENX9ZK8G9gADDQ2vQQ4H/glJs+4jzvx90uYDPvBJMe39fzMPtJpxaCrakkuAW4GrgS+Vko52jhDD3AQuPCE3S864fcngJ8A55dSjszVvNJM+KaoarcAKMA4QJL3AMsbz30KuCHJkiQLgfcfP6iUchD4EvCRJOcm6UnyK0neOLfjS80z6KpaKeUR4CPA15h8g/PXgXsbT9/MZLS/ATwA7AKOAEcbz78bOAt4BHgauA24YK5ml6bLN0WlhiRvAf6llHJJp2eRWuEZurpWkrOTrEkyL8kSJj/5cnun55Ja5Rm6ulaSc4CvAq8EngfuAm4opfy4o4NJLTLoklQJl1wkqRJz+jn0888/vwwMDMzlS0rSGW/v3r0/KKX0T7XfnAZ9YGCA0dHRuXxJSTrjJfl2M/u55CJJlTDoklQJgy5JlTDoklQJgy5JlWgq6En+LMnDSR5KsiPJ/CSXJrk/yaNJdiY5q93DSrNtx44dLF++nN7eXpYvX86OHTs6PZLUsimD3rjGxfuAwVLKcqAXeCfwYeCjpZTLmLwS3VA7B5Vm244dOxgeHmbr1q0cPnyYrVu3Mjw8bNR1xmp2yWUecHbj5rnnMHljgNVMXk4UJu+9+NbZH09qn02bNjEyMsKqVavo6+tj1apVjIyMsGnTpk6PJrVkyqCXUp4C/p7J+yseBH4E7AWeOeFOLk8CS052fJK1SUaTjI6Pj8/O1NIsGBsbY+XKlS/atnLlSsbGxjo0kTQzzSy5LGLy7uiXMnkPxgVM3in9Z530Kl+llJtKKYOllMH+/im/uSrNmWXLlrFnz54XbduzZw/Lli3r0ETSzDSz5PJmYH8pZbyUMgF8FlgBLGwswcDkfRm/06YZpbYYHh5maGiI3bt3MzExwe7duxkaGmJ4eLjTo0ktaeZaLgeA1zeuHf08kzfbHQV2A28HbgWuA+5o15BSO1x77bUArF+/nrGxMZYtW8amTZte2C6daZq6HnqSvwauYfJ+iw8Af8LkmvmtwHmNbX9USvnJL/p7BgcHixfnkqTpSbK3lDI41X5NXW2xlPJBJm/PdaLHgNe1MJskqQ38pqi6ml8sUk3m9Hro0unk+BeLRkZGWLlyJXv27GFoaPL7ca6j60w0p/cUdQ1dp5Ply5ezdetWVq1a9cK23bt3s379eh566KEOTia9WLNr6AZdXau3t5fDhw/T19f3wraJiQnmz5/P0aNHOziZ9GLNBt01dHUtv1ik2riGrq41PDzMNddcw4IFCzhw4AAXX3wxhw4d4sYbb+z0aFJLPEOXgLlcepTaxaCra23atImdO3eyf/9+jh07xv79+9m5c6dXW9QZyzdF1bV8U1RnCt8Ulabgm6KqjUFX1/Jqi6qNn3JR1/Jqi6qNZ+jqavfddx/79u3j2LFj7Nu3j/vuu6/TI0ktM+jqWuvXr2fbtm0sWrSInp4eFi1axLZt21i/fn2nR5Na4qdc1LX6+vro7e3l2LFjTExM0NfXR09PD0ePHmViYqLT40kv8FMu0hSOHDnCxMQEmzdv5tChQ2zevJmJiQmOHDky9cHSacigq6utWbOGDRs2cM4557BhwwbWrFnT6ZGklhl0dbVdu3axZcsWnnvuObZs2cKuXbs6PZLUMtfQ1bVcQ9eZwjV0aQrr1q1jYmKCxYsX09PTw+LFi5mYmGDdunWdHk1qiV8sUtfaunUrADfffDPHjh3j6aef5r3vfe8L26UzjWfo6morVqxg6dKl9PT0sHTpUlasWNHpkaSWeYauruVNolUb3xRV1/Im0TpTeJNoaQpeD11nCj/lIk3B66GrNq6hq0pJmtpv9erVMzree5HqdOIZuqpUSmnqz/bt27n88sshPVx++eVs37696WONuU43rqFLwMDGu3h881WdHkM6KdfQJanLGHRJqoRBl6RKGHRJqoRBl6RKGHRJqoRBl6RKGHRJqkRTQU+yMMltSf4nyViSNyQ5L8ndSR5t/FzU7mElSafW7Bn6jcAXSimvBH4DGAM2Al8upVwGfLnxWJLUIVMGPcm5wG8BIwCllJ+WUp4BrgZuaex2C/DWdg0pSZpaM2fovwyMA/+a5IEkH0+yAHhFKeUgQOPny092cJK1SUaTjI6Pj8/a4JKkF2sm6POA1wL/XEp5DXCIaSyvlFJuKqUMllIG+/v7WxxTkjSVZoL+JPBkKeX+xuPbmAz895JcAND4+f32jChJasaUQS+lfBd4IsmvNTZdCTwC3Alc19h2HXBHWyaUJDWl2TsWrQc+meQs4DHgPUz+Z/CpJEPAAeAd7RlRktSMpoJeSnkQONnF1a+c3XEkSa3ym6KSVAmDLkmVMOiSVAmDLkmVMOiSVAmDLkmVMOiSVAmDLkmVMOiSVAmDLkmVMOiSVAmDLkmVMOiSVAmDLkmVMOiSVAmDLkmVMOiSVAmDLkmVMOiSVAmDLkmVMOiSVAmDLkmVMOiSVAmDLkmVMOiSVAmDLkmVMOiSVAmDLkmVMOiSVAmDLkmVMOiSVAmDLkmVMOiSVAmDLkmVMOiSVAmDLkmVMOiSVImmg56kN8kDST7feHxpkvuTPJpkZ5Kz2jemJGkq0zlDvwEYO+Hxh4GPllIuA54GhmZzMEnS9DQV9CQXAlcBH288DrAauK2xyy3AW9sxoCSpOc2eof8D8JfAscbjxcAzpZQjjcdPAktOdmCStUlGk4yOj4/PaFhJ0qlNGfQkvwt8v5Sy98TNJ9m1nOz4UspNpZTBUspgf39/i2NKkqYyr4l9rgB+L8kaYD5wLpNn7AuTzGucpV8IfKd9Y6qbXbH5Kzz1zPNtf52BjXe19e9fsvBs7t24uq2voe42ZdBLKR8APgCQ5E3AX5RS/jDJp4G3A7cC1wF3tHFOdbGnnnmexzdf1ekxZqzd/2FIM/kc+vuBDUn2MbmmPjI7I0mSWtHMkssLSin3APc0fn8MeN3sjyRJaoXfFJWkShh0SaqEQZekShh0SaqEQZekShh0SaqEQZekShh0SaqEQZekShh0SaqEQZekShh0SaqEQZekShh0SaqEQZekShh0SaqEQZekShh0SaqEQZekShh0SaqEQZekShh0SarEvE4PIE3l8fl/AB/q9BQz9/h8gB91egxVzKDrtDdweDuPb76q02PM2MDGu3i800Ooai65SFIlDLokVcKgS1IlDLokVcKgS1IlDLokVcKgS1IlDLokVcKgS1IlDLokVcKgS1IlDLokVcKgS1Ilpgx6kouS7E4yluThJDc0tp+X5O4kjzZ+Lmr/uJKkU2nm8rlHgD8vpfxnkpcCe5PcDfwx8OVSyuYkG4GNwPvbN6q61ZKFZzOw8a5OjzFjSxae3ekRVLkpg15KOQgcbPz+bJIxYAlwNfCmxm63APdg0NUG925c3fbXGNh4VxXXXFd3m9YaepIB4DXA/cArGrE/Hv2Xn+KYtUlGk4yOj4/PbFpJ0ik1HfQkLwE+A/xpKeXHzR5XSrmplDJYShns7+9vZUZJUhOaCnqSPiZj/slSymcbm7+X5ILG8xcA32/PiJKkZjTzKZcAI8BYKWXLCU/dCVzX+P064I7ZH0+S1KxmPuVyBfAu4L+TPNjY9lfAZuBTSYaAA8A72jOiJKkZzXzKZQ+QUzx95eyOI0lqld8UlaRKGHRJqoRBl6RKGHRJqoRBl6RKGHRJqoRBl6RKGHRJqoRBl6RKGHRJqoRBl6RKGHRJqoRBl6RKGHRJqoRBl6RKGHRJqoRBl6RKGHRJqoRBl6RKGHRJqoRBl6RKGHRJqoRBl6RKGHRJqoRBl6RKGHRJqoRBl6RKGHRJqoRBl6RKGHRJqoRBl6RKGHRJqoRBl6RKGHRJqoRBl6RKGHRJqoRBl6RKzCjoSX4nyTeT7EuycbaGkiRNX8tBT9IL/BPwFuBVwLVJXjVbg0mSpmcmZ+ivA/aVUh4rpfwUuBW4enbGkiRN10yCvgR44oTHTza2vUiStUlGk4yOj4/P4OUkSb/ITIKek2wrP7ehlJtKKYOllMH+/v4ZvJwk6ReZSdCfBC464fGFwHdmNo4kqVUzCfrXgcuSXJrkLOCdwJ2zM5YkabrmtXpgKeVIkuuBLwK9wCdKKQ/P2mSSpGlpOegApZRdwK5ZmkWSNAN+U1SSKmHQJakSBl2SKmHQJakSBl2SKmHQJakSBl2SKmHQJakSM/pikXS6Sk527bgpjvnw9F+nlJ+7Hp3UMQZdVTK06kYuuUhSJQy6JFXCoEtSJQy6JFXCoEtSJQy6JFXCoEtSJQy6JFUic/kFjCTjwLfn7AWl5p0P/KDTQ0incEkppX+qneY06NLpKsloKWWw03NIM+GSiyRVwqBLUiUMujTppk4PIM2Ua+iSVAnP0CWpEgZdkiph0CWpEgZdkiph0NU1knwuyd4kDydZ29g2lOR/k9yT5OYk/9jY3p/kM0m+3vhzRWenl6bmp1zUNZKcV0r5YZKzga8Dvw3cC7wWeBb4CvBfpZTrk2wHtpVS9iS5GPhiKWVZx4aXmuBNotVN3pfkbY3fLwLeBXy1lPJDgCSfBn618fybgVclOX7suUleWkp5di4HlqbDoKsrJHkTk5F+QynluST3AN8ETnXW3dPY9/m5mVCaOdfQ1S1eBjzdiPkrgdcD5wBvTLIoyTzg90/Y/0vA9ccfJHn1nE4rtcCgq1t8AZiX5BvA3wD/ATwF/B1wP/DvwCPAjxr7vw8YTPKNJI8A6+Z+ZGl6fFNUXS3JS0op/9c4Q78d+EQp5fZOzyW1wjN0dbsPJXkQeAjYD3yuw/NILfMMXZIq4Rm6JFXCoEtSJQy6JFXCoEtSJQy6JFXi/wHno0/sqV6NfgAAAABJRU5ErkJggg==\n",
+ "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXQAAAEICAYAAABPgw/pAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAD11JREFUeJzt3X+s3XV9x/Hn695eUqhiS7kaVn5ctrJZ6TI1N0ZLN20x2SzL0EyDbFNm7tI0hsrWLbPu/qHL1qUms4506yZ4XUhmSxVFiDQq02JScMTbwRxw56gUC1j1GkFZoXrbfvbHPSVFW++5595zT/s5z0fS3Hu+5/vtefefZ7/5nHO+35RSkCSd+Xo6PYAkaXYYdEmqhEGXpEoYdEmqhEGXpEoYdEmqhEGXpEoYdEmqhEGXpEoYdFUvycYk30rybJJHkrytsb03yUeS/CDJ/iTXJylJ5jWef1mSkSQHkzyV5G+T9Hb2XyOd2rxODyDNgW8Bvwl8F3gH8G9JlgJXA28BXg0cAj79M8fdAnwPWAosAD4PPAF8bG7GlqYnXstF3SbJg8AHgRuAnaWUjzW2vxm4G+gDFgMHgIWllOcbz18LrC2lrOrI4NIUPENX9ZK8G9gADDQ2vQQ4H/glJs+4jzvx90uYDPvBJMe39fzMPtJpxaCrakkuAW4GrgS+Vko52jhDD3AQuPCE3S864fcngJ8A55dSjszVvNJM+KaoarcAKMA4QJL3AMsbz30KuCHJkiQLgfcfP6iUchD4EvCRJOcm6UnyK0neOLfjS80z6KpaKeUR4CPA15h8g/PXgXsbT9/MZLS/ATwA7AKOAEcbz78bOAt4BHgauA24YK5ml6bLN0WlhiRvAf6llHJJp2eRWuEZurpWkrOTrEkyL8kSJj/5cnun55Ja5Rm6ulaSc4CvAq8EngfuAm4opfy4o4NJLTLoklQJl1wkqRJz+jn0888/vwwMDMzlS0rSGW/v3r0/KKX0T7XfnAZ9YGCA0dHRuXxJSTrjJfl2M/u55CJJlTDoklQJgy5JlTDoklQJgy5JlWgq6En+LMnDSR5KsiPJ/CSXJrk/yaNJdiY5q93DSrNtx44dLF++nN7eXpYvX86OHTs6PZLUsimD3rjGxfuAwVLKcqAXeCfwYeCjpZTLmLwS3VA7B5Vm244dOxgeHmbr1q0cPnyYrVu3Mjw8bNR1xmp2yWUecHbj5rnnMHljgNVMXk4UJu+9+NbZH09qn02bNjEyMsKqVavo6+tj1apVjIyMsGnTpk6PJrVkyqCXUp4C/p7J+yseBH4E7AWeOeFOLk8CS052fJK1SUaTjI6Pj8/O1NIsGBsbY+XKlS/atnLlSsbGxjo0kTQzzSy5LGLy7uiXMnkPxgVM3in9Z530Kl+llJtKKYOllMH+/im/uSrNmWXLlrFnz54XbduzZw/Lli3r0ETSzDSz5PJmYH8pZbyUMgF8FlgBLGwswcDkfRm/06YZpbYYHh5maGiI3bt3MzExwe7duxkaGmJ4eLjTo0ktaeZaLgeA1zeuHf08kzfbHQV2A28HbgWuA+5o15BSO1x77bUArF+/nrGxMZYtW8amTZte2C6daZq6HnqSvwauYfJ+iw8Af8LkmvmtwHmNbX9USvnJL/p7BgcHixfnkqTpSbK3lDI41X5NXW2xlPJBJm/PdaLHgNe1MJskqQ38pqi6ml8sUk3m9Hro0unk+BeLRkZGWLlyJXv27GFoaPL7ca6j60w0p/cUdQ1dp5Ply5ezdetWVq1a9cK23bt3s379eh566KEOTia9WLNr6AZdXau3t5fDhw/T19f3wraJiQnmz5/P0aNHOziZ9GLNBt01dHUtv1ik2riGrq41PDzMNddcw4IFCzhw4AAXX3wxhw4d4sYbb+z0aFJLPEOXgLlcepTaxaCra23atImdO3eyf/9+jh07xv79+9m5c6dXW9QZyzdF1bV8U1RnCt8Ulabgm6KqjUFX1/Jqi6qNn3JR1/Jqi6qNZ+jqavfddx/79u3j2LFj7Nu3j/vuu6/TI0ktM+jqWuvXr2fbtm0sWrSInp4eFi1axLZt21i/fn2nR5Na4qdc1LX6+vro7e3l2LFjTExM0NfXR09PD0ePHmViYqLT40kv8FMu0hSOHDnCxMQEmzdv5tChQ2zevJmJiQmOHDky9cHSacigq6utWbOGDRs2cM4557BhwwbWrFnT6ZGklhl0dbVdu3axZcsWnnvuObZs2cKuXbs6PZLUMtfQ1bVcQ9eZwjV0aQrr1q1jYmKCxYsX09PTw+LFi5mYmGDdunWdHk1qiV8sUtfaunUrADfffDPHjh3j6aef5r3vfe8L26UzjWfo6morVqxg6dKl9PT0sHTpUlasWNHpkaSWeYauruVNolUb3xRV1/Im0TpTeJNoaQpeD11nCj/lIk3B66GrNq6hq0pJmtpv9erVMzree5HqdOIZuqpUSmnqz/bt27n88sshPVx++eVs37696WONuU43rqFLwMDGu3h881WdHkM6KdfQJanLGHRJqoRBl6RKGHRJqoRBl6RKGHRJqoRBl6RKGHRJqkRTQU+yMMltSf4nyViSNyQ5L8ndSR5t/FzU7mElSafW7Bn6jcAXSimvBH4DGAM2Al8upVwGfLnxWJLUIVMGPcm5wG8BIwCllJ+WUp4BrgZuaex2C/DWdg0pSZpaM2fovwyMA/+a5IEkH0+yAHhFKeUgQOPny092cJK1SUaTjI6Pj8/a4JKkF2sm6POA1wL/XEp5DXCIaSyvlFJuKqUMllIG+/v7WxxTkjSVZoL+JPBkKeX+xuPbmAz895JcAND4+f32jChJasaUQS+lfBd4IsmvNTZdCTwC3Alc19h2HXBHWyaUJDWl2TsWrQc+meQs4DHgPUz+Z/CpJEPAAeAd7RlRktSMpoJeSnkQONnF1a+c3XEkSa3ym6KSVAmDLkmVMOiSVAmDLkmVMOiSVAmDLkmVMOiSVAmDLkmVMOiSVAmDLkmVMOiSVAmDLkmVMOiSVAmDLkmVMOiSVAmDLkmVMOiSVAmDLkmVMOiSVAmDLkmVMOiSVAmDLkmVMOiSVAmDLkmVMOiSVAmDLkmVMOiSVAmDLkmVMOiSVAmDLkmVMOiSVAmDLkmVMOiSVAmDLkmVMOiSVAmDLkmVMOiSVImmg56kN8kDST7feHxpkvuTPJpkZ5Kz2jemJGkq0zlDvwEYO+Hxh4GPllIuA54GhmZzMEnS9DQV9CQXAlcBH288DrAauK2xyy3AW9sxoCSpOc2eof8D8JfAscbjxcAzpZQjjcdPAktOdmCStUlGk4yOj4/PaFhJ0qlNGfQkvwt8v5Sy98TNJ9m1nOz4UspNpZTBUspgf39/i2NKkqYyr4l9rgB+L8kaYD5wLpNn7AuTzGucpV8IfKd9Y6qbXbH5Kzz1zPNtf52BjXe19e9fsvBs7t24uq2voe42ZdBLKR8APgCQ5E3AX5RS/jDJp4G3A7cC1wF3tHFOdbGnnnmexzdf1ekxZqzd/2FIM/kc+vuBDUn2MbmmPjI7I0mSWtHMkssLSin3APc0fn8MeN3sjyRJaoXfFJWkShh0SaqEQZekShh0SaqEQZekShh0SaqEQZekShh0SaqEQZekShh0SaqEQZekShh0SaqEQZekShh0SaqEQZekShh0SaqEQZekShh0SaqEQZekShh0SaqEQZekShh0SarEvE4PIE3l8fl/AB/q9BQz9/h8gB91egxVzKDrtDdweDuPb76q02PM2MDGu3i800Ooai65SFIlDLokVcKgS1IlDLokVcKgS1IlDLokVcKgS1IlDLokVcKgS1IlDLokVcKgS1IlDLokVcKgS1Ilpgx6kouS7E4yluThJDc0tp+X5O4kjzZ+Lmr/uJKkU2nm8rlHgD8vpfxnkpcCe5PcDfwx8OVSyuYkG4GNwPvbN6q61ZKFZzOw8a5OjzFjSxae3ekRVLkpg15KOQgcbPz+bJIxYAlwNfCmxm63APdg0NUG925c3fbXGNh4VxXXXFd3m9YaepIB4DXA/cArGrE/Hv2Xn+KYtUlGk4yOj4/PbFpJ0ik1HfQkLwE+A/xpKeXHzR5XSrmplDJYShns7+9vZUZJUhOaCnqSPiZj/slSymcbm7+X5ILG8xcA32/PiJKkZjTzKZcAI8BYKWXLCU/dCVzX+P064I7ZH0+S1KxmPuVyBfAu4L+TPNjY9lfAZuBTSYaAA8A72jOiJKkZzXzKZQ+QUzx95eyOI0lqld8UlaRKGHRJqoRBl6RKGHRJqoRBl6RKGHRJqoRBl6RKGHRJqoRBl6RKGHRJqoRBl6RKGHRJqoRBl6RKGHRJqoRBl6RKGHRJqoRBl6RKGHRJqoRBl6RKGHRJqoRBl6RKGHRJqoRBl6RKGHRJqoRBl6RKGHRJqoRBl6RKGHRJqoRBl6RKGHRJqoRBl6RKGHRJqoRBl6RKGHRJqoRBl6RKGHRJqoRBl6RKzCjoSX4nyTeT7EuycbaGkiRNX8tBT9IL/BPwFuBVwLVJXjVbg0mSpmcmZ+ivA/aVUh4rpfwUuBW4enbGkiRN10yCvgR44oTHTza2vUiStUlGk4yOj4/P4OUkSb/ITIKek2wrP7ehlJtKKYOllMH+/v4ZvJwk6ReZSdCfBC464fGFwHdmNo4kqVUzCfrXgcuSXJrkLOCdwJ2zM5YkabrmtXpgKeVIkuuBLwK9wCdKKQ/P2mSSpGlpOegApZRdwK5ZmkWSNAN+U1SSKmHQJakSBl2SKmHQJakSBl2SKmHQJakSBl2SKmHQJakSM/pikXS6Sk527bgpjvnw9F+nlJ+7Hp3UMQZdVTK06kYuuUhSJQy6JFXCoEtSJQy6JFXCoEtSJQy6JFXCoEtSJQy6JFUic/kFjCTjwLfn7AWl5p0P/KDTQ0incEkppX+qneY06NLpKsloKWWw03NIM+GSiyRVwqBLUiUMujTppk4PIM2Ua+iSVAnP0CWpEgZdkiph0CWpEgZdkiph0NU1knwuyd4kDydZ29g2lOR/k9yT5OYk/9jY3p/kM0m+3vhzRWenl6bmp1zUNZKcV0r5YZKzga8Dvw3cC7wWeBb4CvBfpZTrk2wHtpVS9iS5GPhiKWVZx4aXmuBNotVN3pfkbY3fLwLeBXy1lPJDgCSfBn618fybgVclOX7suUleWkp5di4HlqbDoKsrJHkTk5F+QynluST3AN8ETnXW3dPY9/m5mVCaOdfQ1S1eBjzdiPkrgdcD5wBvTLIoyTzg90/Y/0vA9ccfJHn1nE4rtcCgq1t8AZiX5BvA3wD/ATwF/B1wP/DvwCPAjxr7vw8YTPKNJI8A6+Z+ZGl6fFNUXS3JS0op/9c4Q78d+EQp5fZOzyW1wjN0dbsPJXkQeAjYD3yuw/NILfMMXZIq4Rm6JFXCoEtSJQy6JFXCoEtSJQy6JFXi/wHno0/sqV6NfgAAAABJRU5ErkJggg==\n",
"text/plain": [
"