\n",
+ "
\n",
+ "\n",
"
\n",
" \n",
" \n",
" | \n",
" name | \n",
" population | \n",
- " info.governor | \n",
" state | \n",
" shortname | \n",
+ " info.governor | \n",
"
\n",
" \n",
" \n",
@@ -172,63 +188,63 @@
" 0 | \n",
" Dade | \n",
" 12345 | \n",
- " Rick Scott | \n",
" Florida | \n",
" FL | \n",
+ " Rick Scott | \n",
" \n",
" \n",
" | 1 | \n",
" Broward | \n",
" 40000 | \n",
- " Rick Scott | \n",
" Florida | \n",
" FL | \n",
+ " Rick Scott | \n",
"
\n",
" \n",
" | 2 | \n",
" Palm Beach | \n",
" 60000 | \n",
- " Rick Scott | \n",
" Florida | \n",
" FL | \n",
+ " Rick Scott | \n",
"
\n",
" \n",
" | 3 | \n",
" Summit | \n",
" 1234 | \n",
- " John Kasich | \n",
" Ohio | \n",
" OH | \n",
+ " John Kasich | \n",
"
\n",
" \n",
" | 4 | \n",
" Cuyahoga | \n",
" 1337 | \n",
- " John Kasich | \n",
" Ohio | \n",
" OH | \n",
+ " John Kasich | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
- " name population info.governor state shortname\n",
- "0 Dade 12345 Rick Scott Florida FL\n",
- "1 Broward 40000 Rick Scott Florida FL\n",
- "2 Palm Beach 60000 Rick Scott Florida FL\n",
- "3 Summit 1234 John Kasich Ohio OH\n",
- "4 Cuyahoga 1337 John Kasich Ohio OH"
+ " name population state shortname info.governor\n",
+ "0 Dade 12345 Florida FL Rick Scott\n",
+ "1 Broward 40000 Florida FL Rick Scott\n",
+ "2 Palm Beach 60000 Florida FL Rick Scott\n",
+ "3 Summit 1234 Ohio OH John Kasich\n",
+ "4 Cuyahoga 1337 Ohio OH John Kasich"
]
},
- "execution_count": 8,
+ "execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# further populate tables created from nested element\n",
- "json_normalize(data, 'counties', ['state', 'shortname', ['info', 'governor']])"
+ "pd.json_normalize(data, 'counties', ['state', 'shortname', ['info', 'governor']])"
]
},
{
@@ -245,182 +261,178 @@
},
{
"cell_type": "code",
- "execution_count": 9,
- "metadata": {
- "collapsed": false
- },
+ "execution_count": 11,
+ "metadata": {},
"outputs": [
{
"data": {
"text/plain": [
- "[{u'_id': {u'$oid': u'52b213b38594d8a2be17c780'},\n",
- " u'approvalfy': 1999,\n",
- " u'board_approval_month': u'November',\n",
- " u'boardapprovaldate': u'2013-11-12T00:00:00Z',\n",
- " u'borrower': u'FEDERAL DEMOCRATIC REPUBLIC OF ETHIOPIA',\n",
- " u'closingdate': u'2018-07-07T00:00:00Z',\n",
- " u'country_namecode': u'Federal Democratic Republic of Ethiopia!$!ET',\n",
- " u'countrycode': u'ET',\n",
- " u'countryname': u'Federal Democratic Republic of Ethiopia',\n",
- " u'countryshortname': u'Ethiopia',\n",
- " u'docty': u'Project Information Document,Indigenous Peoples Plan,Project Information Document',\n",
- " u'envassesmentcategorycode': u'C',\n",
- " u'grantamt': 0,\n",
- " u'ibrdcommamt': 0,\n",
- " u'id': u'P129828',\n",
- " u'idacommamt': 130000000,\n",
- " u'impagency': u'MINISTRY OF EDUCATION',\n",
- " u'lendinginstr': u'Investment Project Financing',\n",
- " u'lendinginstrtype': u'IN',\n",
- " u'lendprojectcost': 550000000,\n",
- " u'majorsector_percent': [{u'Name': u'Education', u'Percent': 46},\n",
- " {u'Name': u'Education', u'Percent': 26},\n",
- " {u'Name': u'Public Administration, Law, and Justice', u'Percent': 16},\n",
- " {u'Name': u'Education', u'Percent': 12}],\n",
- " u'mjsector_namecode': [{u'code': u'EX', u'name': u'Education'},\n",
- " {u'code': u'EX', u'name': u'Education'},\n",
- " {u'code': u'BX', u'name': u'Public Administration, Law, and Justice'},\n",
- " {u'code': u'EX', u'name': u'Education'}],\n",
- " u'mjtheme': [u'Human development'],\n",
- " u'mjtheme_namecode': [{u'code': u'8', u'name': u'Human development'},\n",
- " {u'code': u'11', u'name': u''}],\n",
- " u'mjthemecode': u'8,11',\n",
- " u'prodline': u'PE',\n",
- " u'prodlinetext': u'IBRD/IDA',\n",
- " u'productlinetype': u'L',\n",
- " u'project_abstract': {u'cdata': u'The development objective of the Second Phase of General Education Quality Improvement Project for Ethiopia is to improve learning conditions in primary and secondary schools and strengthen institutions at different levels of educational administration. The project has six components. The first component is curriculum, textbooks, assessment, examinations, and inspection. This component will support improvement of learning conditions in grades KG-12 by providing increased access to teaching and learning materials and through improvements to the curriculum by assessing the strengths and weaknesses of the current curriculum. This component has following four sub-components: (i) curriculum reform and implementation; (ii) teaching and learning materials; (iii) assessment and examinations; and (iv) inspection. The second component is teacher development program (TDP). This component will support improvements in learning conditions in both primary and secondary schools by advancing the quality of teaching in general education through: (a) enhancing the training of pre-service teachers in teacher education institutions; and (b) improving the quality of in-service teacher training. This component has following three sub-components: (i) pre-service teacher training; (ii) in-service teacher training; and (iii) licensing and relicensing of teachers and school leaders. The third component is school improvement plan. This component will support the strengthening of school planning in order to improve learning outcomes, and to partly fund the school improvement plans through school grants. It has following two sub-components: (i) school improvement plan; and (ii) school grants. The fourth component is management and capacity building, including education management information systems (EMIS). This component will support management and capacity building aspect of the project. This component has following three sub-components: (i) capacity building for education planning and management; (ii) capacity building for school planning and management; and (iii) EMIS. The fifth component is improving the quality of learning and teaching in secondary schools and universities through the use of information and communications technology (ICT). It has following five sub-components: (i) national policy and institution for ICT in general education; (ii) national ICT infrastructure improvement plan for general education; (iii) develop an integrated monitoring, evaluation, and learning system specifically for the ICT component; (iv) teacher professional development in the use of ICT; and (v) provision of limited number of e-Braille display readers with the possibility to scale up to all secondary education schools based on the successful implementation and usage of the readers. The sixth component is program coordination, monitoring and evaluation, and communication. It will support institutional strengthening by developing capacities in all aspects of program coordination, monitoring and evaluation; a new sub-component on communications will support information sharing for better management and accountability. It has following three sub-components: (i) program coordination; (ii) monitoring and evaluation (M and E); and (iii) communication.'},\n",
- " u'project_name': u'Ethiopia General Education Quality Improvement Project II',\n",
- " u'projectdocs': [{u'DocDate': u'28-AUG-2013',\n",
- " u'DocType': u'PID',\n",
- " u'DocTypeDesc': u'Project Information Document (PID), Vol.',\n",
- " u'DocURL': u'http://www-wds.worldbank.org/servlet/WDSServlet?pcont=details&eid=090224b081e545fb_1_0',\n",
- " u'EntityID': u'090224b081e545fb_1_0'},\n",
- " {u'DocDate': u'01-JUL-2013',\n",
- " u'DocType': u'IP',\n",
- " u'DocTypeDesc': u'Indigenous Peoples Plan (IP), Vol.1 of 1',\n",
- " u'DocURL': u'http://www-wds.worldbank.org/servlet/WDSServlet?pcont=details&eid=000442464_20130920111729',\n",
- " u'EntityID': u'000442464_20130920111729'},\n",
- " {u'DocDate': u'22-NOV-2012',\n",
- " u'DocType': u'PID',\n",
- " u'DocTypeDesc': u'Project Information Document (PID), Vol.',\n",
- " u'DocURL': u'http://www-wds.worldbank.org/servlet/WDSServlet?pcont=details&eid=090224b0817b19e2_1_0',\n",
- " u'EntityID': u'090224b0817b19e2_1_0'}],\n",
- " u'projectfinancialtype': u'IDA',\n",
- " u'projectstatusdisplay': u'Active',\n",
- " u'regionname': u'Africa',\n",
- " u'sector': [{u'Name': u'Primary education'},\n",
- " {u'Name': u'Secondary education'},\n",
- " {u'Name': u'Public administration- Other social services'},\n",
- " {u'Name': u'Tertiary education'}],\n",
- " u'sector1': {u'Name': u'Primary education', u'Percent': 46},\n",
- " u'sector2': {u'Name': u'Secondary education', u'Percent': 26},\n",
- " u'sector3': {u'Name': u'Public administration- Other social services',\n",
- " u'Percent': 16},\n",
- " u'sector4': {u'Name': u'Tertiary education', u'Percent': 12},\n",
- " u'sector_namecode': [{u'code': u'EP', u'name': u'Primary education'},\n",
- " {u'code': u'ES', u'name': u'Secondary education'},\n",
- " {u'code': u'BS', u'name': u'Public administration- Other social services'},\n",
- " {u'code': u'ET', u'name': u'Tertiary education'}],\n",
- " u'sectorcode': u'ET,BS,ES,EP',\n",
- " u'source': u'IBRD',\n",
- " u'status': u'Active',\n",
- " u'supplementprojectflg': u'N',\n",
- " u'theme1': {u'Name': u'Education for all', u'Percent': 100},\n",
- " u'theme_namecode': [{u'code': u'65', u'name': u'Education for all'}],\n",
- " u'themecode': u'65',\n",
- " u'totalamt': 130000000,\n",
- " u'totalcommamt': 130000000,\n",
- " u'url': u'http://www.worldbank.org/projects/P129828/ethiopia-general-education-quality-improvement-project-ii?lang=en'},\n",
- " {u'_id': {u'$oid': u'52b213b38594d8a2be17c781'},\n",
- " u'approvalfy': 2015,\n",
- " u'board_approval_month': u'November',\n",
- " u'boardapprovaldate': u'2013-11-04T00:00:00Z',\n",
- " u'borrower': u'GOVERNMENT OF TUNISIA',\n",
- " u'country_namecode': u'Republic of Tunisia!$!TN',\n",
- " u'countrycode': u'TN',\n",
- " u'countryname': u'Republic of Tunisia',\n",
- " u'countryshortname': u'Tunisia',\n",
- " u'docty': u'Project Information Document,Integrated Safeguards Data Sheet,Integrated Safeguards Data Sheet,Project Information Document,Integrated Safeguards Data Sheet,Project Information Document',\n",
- " u'envassesmentcategorycode': u'C',\n",
- " u'grantamt': 4700000,\n",
- " u'ibrdcommamt': 0,\n",
- " u'id': u'P144674',\n",
- " u'idacommamt': 0,\n",
- " u'impagency': u'MINISTRY OF FINANCE',\n",
- " u'lendinginstr': u'Specific Investment Loan',\n",
- " u'lendinginstrtype': u'IN',\n",
- " u'lendprojectcost': 5700000,\n",
- " u'majorsector_percent': [{u'Name': u'Public Administration, Law, and Justice',\n",
- " u'Percent': 70},\n",
- " {u'Name': u'Public Administration, Law, and Justice', u'Percent': 30}],\n",
- " u'mjsector_namecode': [{u'code': u'BX',\n",
- " u'name': u'Public Administration, Law, and Justice'},\n",
- " {u'code': u'BX', u'name': u'Public Administration, Law, and Justice'}],\n",
- " u'mjtheme': [u'Economic management',\n",
- " u'Social protection and risk management'],\n",
- " u'mjtheme_namecode': [{u'code': u'1', u'name': u'Economic management'},\n",
- " {u'code': u'6', u'name': u'Social protection and risk management'}],\n",
- " u'mjthemecode': u'1,6',\n",
- " u'prodline': u'RE',\n",
- " u'prodlinetext': u'Recipient Executed Activities',\n",
- " u'productlinetype': u'L',\n",
- " u'project_name': u'TN: DTF Social Protection Reforms Support',\n",
- " u'projectdocs': [{u'DocDate': u'29-MAR-2013',\n",
- " u'DocType': u'PID',\n",
- " u'DocTypeDesc': u'Project Information Document (PID), Vol.1 of 1',\n",
- " u'DocURL': u'http://www-wds.worldbank.org/servlet/WDSServlet?pcont=details&eid=000333037_20131024115616',\n",
- " u'EntityID': u'000333037_20131024115616'},\n",
- " {u'DocDate': u'29-MAR-2013',\n",
- " u'DocType': u'ISDS',\n",
- " u'DocTypeDesc': u'Integrated Safeguards Data Sheet (ISDS), Vol.1 of 1',\n",
- " u'DocURL': u'http://www-wds.worldbank.org/servlet/WDSServlet?pcont=details&eid=000356161_20131024151611',\n",
- " u'EntityID': u'000356161_20131024151611'},\n",
- " {u'DocDate': u'29-MAR-2013',\n",
- " u'DocType': u'ISDS',\n",
- " u'DocTypeDesc': u'Integrated Safeguards Data Sheet (ISDS), Vol.1 of 1',\n",
- " u'DocURL': u'http://www-wds.worldbank.org/servlet/WDSServlet?pcont=details&eid=000442464_20131031112136',\n",
- " u'EntityID': u'000442464_20131031112136'},\n",
- " {u'DocDate': u'29-MAR-2013',\n",
- " u'DocType': u'PID',\n",
- " u'DocTypeDesc': u'Project Information Document (PID), Vol.1 of 1',\n",
- " u'DocURL': u'http://www-wds.worldbank.org/servlet/WDSServlet?pcont=details&eid=000333037_20131031105716',\n",
- " u'EntityID': u'000333037_20131031105716'},\n",
- " {u'DocDate': u'16-JAN-2013',\n",
- " u'DocType': u'ISDS',\n",
- " u'DocTypeDesc': u'Integrated Safeguards Data Sheet (ISDS), Vol.1 of 1',\n",
- " u'DocURL': u'http://www-wds.worldbank.org/servlet/WDSServlet?pcont=details&eid=000356161_20130305113209',\n",
- " u'EntityID': u'000356161_20130305113209'},\n",
- " {u'DocDate': u'16-JAN-2013',\n",
- " u'DocType': u'PID',\n",
- " u'DocTypeDesc': u'Project Information Document (PID), Vol.1 of 1',\n",
- " u'DocURL': u'http://www-wds.worldbank.org/servlet/WDSServlet?pcont=details&eid=000356161_20130305113716',\n",
- " u'EntityID': u'000356161_20130305113716'}],\n",
- " u'projectfinancialtype': u'OTHER',\n",
- " u'projectstatusdisplay': u'Active',\n",
- " u'regionname': u'Middle East and North Africa',\n",
- " u'sector': [{u'Name': u'Public administration- Other social services'},\n",
- " {u'Name': u'General public administration sector'}],\n",
- " u'sector1': {u'Name': u'Public administration- Other social services',\n",
- " u'Percent': 70},\n",
- " u'sector2': {u'Name': u'General public administration sector',\n",
- " u'Percent': 30},\n",
- " u'sector_namecode': [{u'code': u'BS',\n",
- " u'name': u'Public administration- Other social services'},\n",
- " {u'code': u'BZ', u'name': u'General public administration sector'}],\n",
- " u'sectorcode': u'BZ,BS',\n",
- " u'source': u'IBRD',\n",
- " u'status': u'Active',\n",
- " u'supplementprojectflg': u'N',\n",
- " u'theme1': {u'Name': u'Other economic management', u'Percent': 30},\n",
- " u'theme_namecode': [{u'code': u'24', u'name': u'Other economic management'},\n",
- " {u'code': u'54', u'name': u'Social safety nets'}],\n",
- " u'themecode': u'54,24',\n",
- " u'totalamt': 0,\n",
- " u'totalcommamt': 4700000,\n",
- " u'url': u'http://www.worldbank.org/projects/P144674?lang=en'}]"
+ "[{'_id': {'$oid': '52b213b38594d8a2be17c780'},\n",
+ " 'approvalfy': 1999,\n",
+ " 'board_approval_month': 'November',\n",
+ " 'boardapprovaldate': '2013-11-12T00:00:00Z',\n",
+ " 'borrower': 'FEDERAL DEMOCRATIC REPUBLIC OF ETHIOPIA',\n",
+ " 'closingdate': '2018-07-07T00:00:00Z',\n",
+ " 'country_namecode': 'Federal Democratic Republic of Ethiopia!$!ET',\n",
+ " 'countrycode': 'ET',\n",
+ " 'countryname': 'Federal Democratic Republic of Ethiopia',\n",
+ " 'countryshortname': 'Ethiopia',\n",
+ " 'docty': 'Project Information Document,Indigenous Peoples Plan,Project Information Document',\n",
+ " 'envassesmentcategorycode': 'C',\n",
+ " 'grantamt': 0,\n",
+ " 'ibrdcommamt': 0,\n",
+ " 'id': 'P129828',\n",
+ " 'idacommamt': 130000000,\n",
+ " 'impagency': 'MINISTRY OF EDUCATION',\n",
+ " 'lendinginstr': 'Investment Project Financing',\n",
+ " 'lendinginstrtype': 'IN',\n",
+ " 'lendprojectcost': 550000000,\n",
+ " 'majorsector_percent': [{'Name': 'Education', 'Percent': 46},\n",
+ " {'Name': 'Education', 'Percent': 26},\n",
+ " {'Name': 'Public Administration, Law, and Justice', 'Percent': 16},\n",
+ " {'Name': 'Education', 'Percent': 12}],\n",
+ " 'mjsector_namecode': [{'name': 'Education', 'code': 'EX'},\n",
+ " {'name': 'Education', 'code': 'EX'},\n",
+ " {'name': 'Public Administration, Law, and Justice', 'code': 'BX'},\n",
+ " {'name': 'Education', 'code': 'EX'}],\n",
+ " 'mjtheme': ['Human development'],\n",
+ " 'mjtheme_namecode': [{'name': 'Human development', 'code': '8'},\n",
+ " {'name': '', 'code': '11'}],\n",
+ " 'mjthemecode': '8,11',\n",
+ " 'prodline': 'PE',\n",
+ " 'prodlinetext': 'IBRD/IDA',\n",
+ " 'productlinetype': 'L',\n",
+ " 'project_abstract': {'cdata': 'The development objective of the Second Phase of General Education Quality Improvement Project for Ethiopia is to improve learning conditions in primary and secondary schools and strengthen institutions at different levels of educational administration. The project has six components. The first component is curriculum, textbooks, assessment, examinations, and inspection. This component will support improvement of learning conditions in grades KG-12 by providing increased access to teaching and learning materials and through improvements to the curriculum by assessing the strengths and weaknesses of the current curriculum. This component has following four sub-components: (i) curriculum reform and implementation; (ii) teaching and learning materials; (iii) assessment and examinations; and (iv) inspection. The second component is teacher development program (TDP). This component will support improvements in learning conditions in both primary and secondary schools by advancing the quality of teaching in general education through: (a) enhancing the training of pre-service teachers in teacher education institutions; and (b) improving the quality of in-service teacher training. This component has following three sub-components: (i) pre-service teacher training; (ii) in-service teacher training; and (iii) licensing and relicensing of teachers and school leaders. The third component is school improvement plan. This component will support the strengthening of school planning in order to improve learning outcomes, and to partly fund the school improvement plans through school grants. It has following two sub-components: (i) school improvement plan; and (ii) school grants. The fourth component is management and capacity building, including education management information systems (EMIS). This component will support management and capacity building aspect of the project. This component has following three sub-components: (i) capacity building for education planning and management; (ii) capacity building for school planning and management; and (iii) EMIS. The fifth component is improving the quality of learning and teaching in secondary schools and universities through the use of information and communications technology (ICT). It has following five sub-components: (i) national policy and institution for ICT in general education; (ii) national ICT infrastructure improvement plan for general education; (iii) develop an integrated monitoring, evaluation, and learning system specifically for the ICT component; (iv) teacher professional development in the use of ICT; and (v) provision of limited number of e-Braille display readers with the possibility to scale up to all secondary education schools based on the successful implementation and usage of the readers. The sixth component is program coordination, monitoring and evaluation, and communication. It will support institutional strengthening by developing capacities in all aspects of program coordination, monitoring and evaluation; a new sub-component on communications will support information sharing for better management and accountability. It has following three sub-components: (i) program coordination; (ii) monitoring and evaluation (M and E); and (iii) communication.'},\n",
+ " 'project_name': 'Ethiopia General Education Quality Improvement Project II',\n",
+ " 'projectdocs': [{'DocTypeDesc': 'Project Information Document (PID), Vol.',\n",
+ " 'DocType': 'PID',\n",
+ " 'EntityID': '090224b081e545fb_1_0',\n",
+ " 'DocURL': 'http://www-wds.worldbank.org/servlet/WDSServlet?pcont=details&eid=090224b081e545fb_1_0',\n",
+ " 'DocDate': '28-AUG-2013'},\n",
+ " {'DocTypeDesc': 'Indigenous Peoples Plan (IP), Vol.1 of 1',\n",
+ " 'DocType': 'IP',\n",
+ " 'EntityID': '000442464_20130920111729',\n",
+ " 'DocURL': 'http://www-wds.worldbank.org/servlet/WDSServlet?pcont=details&eid=000442464_20130920111729',\n",
+ " 'DocDate': '01-JUL-2013'},\n",
+ " {'DocTypeDesc': 'Project Information Document (PID), Vol.',\n",
+ " 'DocType': 'PID',\n",
+ " 'EntityID': '090224b0817b19e2_1_0',\n",
+ " 'DocURL': 'http://www-wds.worldbank.org/servlet/WDSServlet?pcont=details&eid=090224b0817b19e2_1_0',\n",
+ " 'DocDate': '22-NOV-2012'}],\n",
+ " 'projectfinancialtype': 'IDA',\n",
+ " 'projectstatusdisplay': 'Active',\n",
+ " 'regionname': 'Africa',\n",
+ " 'sector': [{'Name': 'Primary education'},\n",
+ " {'Name': 'Secondary education'},\n",
+ " {'Name': 'Public administration- Other social services'},\n",
+ " {'Name': 'Tertiary education'}],\n",
+ " 'sector1': {'Name': 'Primary education', 'Percent': 46},\n",
+ " 'sector2': {'Name': 'Secondary education', 'Percent': 26},\n",
+ " 'sector3': {'Name': 'Public administration- Other social services',\n",
+ " 'Percent': 16},\n",
+ " 'sector4': {'Name': 'Tertiary education', 'Percent': 12},\n",
+ " 'sector_namecode': [{'name': 'Primary education', 'code': 'EP'},\n",
+ " {'name': 'Secondary education', 'code': 'ES'},\n",
+ " {'name': 'Public administration- Other social services', 'code': 'BS'},\n",
+ " {'name': 'Tertiary education', 'code': 'ET'}],\n",
+ " 'sectorcode': 'ET,BS,ES,EP',\n",
+ " 'source': 'IBRD',\n",
+ " 'status': 'Active',\n",
+ " 'supplementprojectflg': 'N',\n",
+ " 'theme1': {'Name': 'Education for all', 'Percent': 100},\n",
+ " 'theme_namecode': [{'name': 'Education for all', 'code': '65'}],\n",
+ " 'themecode': '65',\n",
+ " 'totalamt': 130000000,\n",
+ " 'totalcommamt': 130000000,\n",
+ " 'url': 'http://www.worldbank.org/projects/P129828/ethiopia-general-education-quality-improvement-project-ii?lang=en'},\n",
+ " {'_id': {'$oid': '52b213b38594d8a2be17c781'},\n",
+ " 'approvalfy': 2015,\n",
+ " 'board_approval_month': 'November',\n",
+ " 'boardapprovaldate': '2013-11-04T00:00:00Z',\n",
+ " 'borrower': 'GOVERNMENT OF TUNISIA',\n",
+ " 'country_namecode': 'Republic of Tunisia!$!TN',\n",
+ " 'countrycode': 'TN',\n",
+ " 'countryname': 'Republic of Tunisia',\n",
+ " 'countryshortname': 'Tunisia',\n",
+ " 'docty': 'Project Information Document,Integrated Safeguards Data Sheet,Integrated Safeguards Data Sheet,Project Information Document,Integrated Safeguards Data Sheet,Project Information Document',\n",
+ " 'envassesmentcategorycode': 'C',\n",
+ " 'grantamt': 4700000,\n",
+ " 'ibrdcommamt': 0,\n",
+ " 'id': 'P144674',\n",
+ " 'idacommamt': 0,\n",
+ " 'impagency': 'MINISTRY OF FINANCE',\n",
+ " 'lendinginstr': 'Specific Investment Loan',\n",
+ " 'lendinginstrtype': 'IN',\n",
+ " 'lendprojectcost': 5700000,\n",
+ " 'majorsector_percent': [{'Name': 'Public Administration, Law, and Justice',\n",
+ " 'Percent': 70},\n",
+ " {'Name': 'Public Administration, Law, and Justice', 'Percent': 30}],\n",
+ " 'mjsector_namecode': [{'name': 'Public Administration, Law, and Justice',\n",
+ " 'code': 'BX'},\n",
+ " {'name': 'Public Administration, Law, and Justice', 'code': 'BX'}],\n",
+ " 'mjtheme': ['Economic management', 'Social protection and risk management'],\n",
+ " 'mjtheme_namecode': [{'name': 'Economic management', 'code': '1'},\n",
+ " {'name': 'Social protection and risk management', 'code': '6'}],\n",
+ " 'mjthemecode': '1,6',\n",
+ " 'prodline': 'RE',\n",
+ " 'prodlinetext': 'Recipient Executed Activities',\n",
+ " 'productlinetype': 'L',\n",
+ " 'project_name': 'TN: DTF Social Protection Reforms Support',\n",
+ " 'projectdocs': [{'DocTypeDesc': 'Project Information Document (PID), Vol.1 of 1',\n",
+ " 'DocType': 'PID',\n",
+ " 'EntityID': '000333037_20131024115616',\n",
+ " 'DocURL': 'http://www-wds.worldbank.org/servlet/WDSServlet?pcont=details&eid=000333037_20131024115616',\n",
+ " 'DocDate': '29-MAR-2013'},\n",
+ " {'DocTypeDesc': 'Integrated Safeguards Data Sheet (ISDS), Vol.1 of 1',\n",
+ " 'DocType': 'ISDS',\n",
+ " 'EntityID': '000356161_20131024151611',\n",
+ " 'DocURL': 'http://www-wds.worldbank.org/servlet/WDSServlet?pcont=details&eid=000356161_20131024151611',\n",
+ " 'DocDate': '29-MAR-2013'},\n",
+ " {'DocTypeDesc': 'Integrated Safeguards Data Sheet (ISDS), Vol.1 of 1',\n",
+ " 'DocType': 'ISDS',\n",
+ " 'EntityID': '000442464_20131031112136',\n",
+ " 'DocURL': 'http://www-wds.worldbank.org/servlet/WDSServlet?pcont=details&eid=000442464_20131031112136',\n",
+ " 'DocDate': '29-MAR-2013'},\n",
+ " {'DocTypeDesc': 'Project Information Document (PID), Vol.1 of 1',\n",
+ " 'DocType': 'PID',\n",
+ " 'EntityID': '000333037_20131031105716',\n",
+ " 'DocURL': 'http://www-wds.worldbank.org/servlet/WDSServlet?pcont=details&eid=000333037_20131031105716',\n",
+ " 'DocDate': '29-MAR-2013'},\n",
+ " {'DocTypeDesc': 'Integrated Safeguards Data Sheet (ISDS), Vol.1 of 1',\n",
+ " 'DocType': 'ISDS',\n",
+ " 'EntityID': '000356161_20130305113209',\n",
+ " 'DocURL': 'http://www-wds.worldbank.org/servlet/WDSServlet?pcont=details&eid=000356161_20130305113209',\n",
+ " 'DocDate': '16-JAN-2013'},\n",
+ " {'DocTypeDesc': 'Project Information Document (PID), Vol.1 of 1',\n",
+ " 'DocType': 'PID',\n",
+ " 'EntityID': '000356161_20130305113716',\n",
+ " 'DocURL': 'http://www-wds.worldbank.org/servlet/WDSServlet?pcont=details&eid=000356161_20130305113716',\n",
+ " 'DocDate': '16-JAN-2013'}],\n",
+ " 'projectfinancialtype': 'OTHER',\n",
+ " 'projectstatusdisplay': 'Active',\n",
+ " 'regionname': 'Middle East and North Africa',\n",
+ " 'sector': [{'Name': 'Public administration- Other social services'},\n",
+ " {'Name': 'General public administration sector'}],\n",
+ " 'sector1': {'Name': 'Public administration- Other social services',\n",
+ " 'Percent': 70},\n",
+ " 'sector2': {'Name': 'General public administration sector', 'Percent': 30},\n",
+ " 'sector_namecode': [{'name': 'Public administration- Other social services',\n",
+ " 'code': 'BS'},\n",
+ " {'name': 'General public administration sector', 'code': 'BZ'}],\n",
+ " 'sectorcode': 'BZ,BS',\n",
+ " 'source': 'IBRD',\n",
+ " 'status': 'Active',\n",
+ " 'supplementprojectflg': 'N',\n",
+ " 'theme1': {'Name': 'Other economic management', 'Percent': 30},\n",
+ " 'theme_namecode': [{'name': 'Other economic management', 'code': '24'},\n",
+ " {'name': 'Social safety nets', 'code': '54'}],\n",
+ " 'themecode': '54,24',\n",
+ " 'totalamt': 0,\n",
+ " 'totalcommamt': 4700000,\n",
+ " 'url': 'http://www.worldbank.org/projects/P144674?lang=en'}]"
]
},
- "execution_count": 9,
+ "execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
@@ -432,15 +444,26 @@
},
{
"cell_type": "code",
- "execution_count": 10,
- "metadata": {
- "collapsed": false
- },
+ "execution_count": 12,
+ "metadata": {},
"outputs": [
{
"data": {
"text/html": [
- "
\n",
+ "
\n",
+ "\n",
"
\n",
" \n",
" \n",
@@ -471,7 +494,7 @@
"
\n",
" \n",
" | 0 | \n",
- " {u'$oid': u'52b213b38594d8a2be17c780'} | \n",
+ " {'$oid': '52b213b38594d8a2be17c780'} | \n",
" 1999 | \n",
" November | \n",
" 2013-11-12T00:00:00Z | \n",
@@ -486,8 +509,8 @@
" IBRD | \n",
" Active | \n",
" N | \n",
- " {u'Percent': 100, u'Name': u'Education for all'} | \n",
- " [{u'code': u'65', u'name': u'Education for all'}] | \n",
+ " {'Name': 'Education for all', 'Percent': 100} | \n",
+ " [{'name': 'Education for all', 'code': '65'}] | \n",
" 65 | \n",
" 130000000 | \n",
" 130000000 | \n",
@@ -495,7 +518,7 @@
"
\n",
" \n",
" | 1 | \n",
- " {u'$oid': u'52b213b38594d8a2be17c781'} | \n",
+ " {'$oid': '52b213b38594d8a2be17c781'} | \n",
" 2015 | \n",
" November | \n",
" 2013-11-04T00:00:00Z | \n",
@@ -510,8 +533,8 @@
" IBRD | \n",
" Active | \n",
" N | \n",
- " {u'Percent': 30, u'Name': u'Other economic man... | \n",
- " [{u'code': u'24', u'name': u'Other economic ma... | \n",
+ " {'Name': 'Other economic management', 'Percent... | \n",
+ " [{'name': 'Other economic management', 'code':... | \n",
" 54,24 | \n",
" 0 | \n",
" 4700000 | \n",
@@ -523,9 +546,9 @@
""
],
"text/plain": [
- " _id approvalfy board_approval_month \\\n",
- "0 {u'$oid': u'52b213b38594d8a2be17c780'} 1999 November \n",
- "1 {u'$oid': u'52b213b38594d8a2be17c781'} 2015 November \n",
+ " _id approvalfy board_approval_month \\\n",
+ "0 {'$oid': '52b213b38594d8a2be17c780'} 1999 November \n",
+ "1 {'$oid': '52b213b38594d8a2be17c781'} 2015 November \n",
"\n",
" boardapprovaldate borrower \\\n",
"0 2013-11-12T00:00:00Z FEDERAL DEMOCRATIC REPUBLIC OF ETHIOPIA \n",
@@ -535,25 +558,21 @@
"0 2018-07-07T00:00:00Z Federal Democratic Republic of Ethiopia!$!ET \n",
"1 NaN Republic of Tunisia!$!TN \n",
"\n",
- " countrycode countryname countryshortname \\\n",
- "0 ET Federal Democratic Republic of Ethiopia Ethiopia \n",
- "1 TN Republic of Tunisia Tunisia \n",
+ " countrycode countryname countryshortname ... \\\n",
+ "0 ET Federal Democratic Republic of Ethiopia Ethiopia ... \n",
+ "1 TN Republic of Tunisia Tunisia ... \n",
"\n",
- " ... sectorcode source \\\n",
- "0 ... ET,BS,ES,EP IBRD \n",
- "1 ... BZ,BS IBRD \n",
- "\n",
- " status supplementprojectflg \\\n",
- "0 Active N \n",
- "1 Active N \n",
+ " sectorcode source status supplementprojectflg \\\n",
+ "0 ET,BS,ES,EP IBRD Active N \n",
+ "1 BZ,BS IBRD Active N \n",
"\n",
" theme1 \\\n",
- "0 {u'Percent': 100, u'Name': u'Education for all'} \n",
- "1 {u'Percent': 30, u'Name': u'Other economic man... \n",
+ "0 {'Name': 'Education for all', 'Percent': 100} \n",
+ "1 {'Name': 'Other economic management', 'Percent... \n",
"\n",
" theme_namecode themecode totalamt \\\n",
- "0 [{u'code': u'65', u'name': u'Education for all'}] 65 130000000 \n",
- "1 [{u'code': u'24', u'name': u'Other economic ma... 54,24 0 \n",
+ "0 [{'name': 'Education for all', 'code': '65'}] 65 130000000 \n",
+ "1 [{'name': 'Other economic management', 'code':... 54,24 0 \n",
"\n",
" totalcommamt url \n",
"0 130000000 http://www.worldbank.org/projects/P129828/ethi... \n",
@@ -562,7 +581,7 @@
"[2 rows x 50 columns]"
]
},
- "execution_count": 10,
+ "execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
@@ -586,35 +605,662 @@
"3. In 2. above you will notice that some entries have only the code and the name is missing. Create a dataframe with the missing names filled in."
]
},
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "[{'code': '8', 'name': 'Human development'}, {'code': '11', 'name': ''}]"
+ ]
+ },
+ "execution_count": 12,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "import json\n",
+ "from pandas.io.json import json_normalize\n",
+ "\n",
+ "df = pd.read_json('data/world_bank_projects.json')\n",
+ "#print(df.info())\n",
+ "df.mjtheme_namecode[0]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 65,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " sector | \n",
+ " supplementprojectflg | \n",
+ " projectfinancialtype | \n",
+ " prodline | \n",
+ " mjtheme | \n",
+ " idacommamt | \n",
+ " impagency | \n",
+ " project_name | \n",
+ " mjthemecode | \n",
+ " closingdate | \n",
+ " ... | \n",
+ " sector3 | \n",
+ " majorsector_percent | \n",
+ " board_approval_month | \n",
+ " theme_namecode | \n",
+ " url | \n",
+ " source | \n",
+ " projectstatusdisplay | \n",
+ " ibrdcommamt | \n",
+ " sector_namecode | \n",
+ " _id | \n",
+ "
\n",
+ " \n",
+ " | countryname | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | People's Republic of China | \n",
+ " 19 | \n",
+ " 19 | \n",
+ " 19 | \n",
+ " 19 | \n",
+ " 17 | \n",
+ " 19 | \n",
+ " 19 | \n",
+ " 19 | \n",
+ " 19 | \n",
+ " 16 | \n",
+ " ... | \n",
+ " 11 | \n",
+ " 19 | \n",
+ " 19 | \n",
+ " 17 | \n",
+ " 19 | \n",
+ " 19 | \n",
+ " 19 | \n",
+ " 19 | \n",
+ " 19 | \n",
+ " 19 | \n",
+ "
\n",
+ " \n",
+ " | Republic of Indonesia | \n",
+ " 19 | \n",
+ " 19 | \n",
+ " 19 | \n",
+ " 19 | \n",
+ " 19 | \n",
+ " 19 | \n",
+ " 19 | \n",
+ " 19 | \n",
+ " 19 | \n",
+ " 15 | \n",
+ " ... | \n",
+ " 10 | \n",
+ " 19 | \n",
+ " 19 | \n",
+ " 19 | \n",
+ " 19 | \n",
+ " 19 | \n",
+ " 19 | \n",
+ " 19 | \n",
+ " 19 | \n",
+ " 19 | \n",
+ "
\n",
+ " \n",
+ " | Socialist Republic of Vietnam | \n",
+ " 17 | \n",
+ " 16 | \n",
+ " 17 | \n",
+ " 17 | \n",
+ " 17 | \n",
+ " 17 | \n",
+ " 17 | \n",
+ " 17 | \n",
+ " 17 | \n",
+ " 14 | \n",
+ " ... | \n",
+ " 10 | \n",
+ " 17 | \n",
+ " 17 | \n",
+ " 17 | \n",
+ " 17 | \n",
+ " 17 | \n",
+ " 17 | \n",
+ " 17 | \n",
+ " 17 | \n",
+ " 17 | \n",
+ "
\n",
+ " \n",
+ " | Republic of India | \n",
+ " 16 | \n",
+ " 16 | \n",
+ " 16 | \n",
+ " 16 | \n",
+ " 16 | \n",
+ " 16 | \n",
+ " 15 | \n",
+ " 16 | \n",
+ " 16 | \n",
+ " 13 | \n",
+ " ... | \n",
+ " 9 | \n",
+ " 16 | \n",
+ " 16 | \n",
+ " 16 | \n",
+ " 16 | \n",
+ " 16 | \n",
+ " 16 | \n",
+ " 16 | \n",
+ " 16 | \n",
+ " 16 | \n",
+ "
\n",
+ " \n",
+ " | Republic of Yemen | \n",
+ " 13 | \n",
+ " 13 | \n",
+ " 13 | \n",
+ " 13 | \n",
+ " 13 | \n",
+ " 13 | \n",
+ " 13 | \n",
+ " 13 | \n",
+ " 13 | \n",
+ " 8 | \n",
+ " ... | \n",
+ " 4 | \n",
+ " 13 | \n",
+ " 13 | \n",
+ " 13 | \n",
+ " 13 | \n",
+ " 13 | \n",
+ " 13 | \n",
+ " 13 | \n",
+ " 13 | \n",
+ " 13 | \n",
+ "
\n",
+ " \n",
+ " | People's Republic of Bangladesh | \n",
+ " 12 | \n",
+ " 12 | \n",
+ " 12 | \n",
+ " 12 | \n",
+ " 12 | \n",
+ " 12 | \n",
+ " 12 | \n",
+ " 12 | \n",
+ " 12 | \n",
+ " 10 | \n",
+ " ... | \n",
+ " 6 | \n",
+ " 12 | \n",
+ " 12 | \n",
+ " 12 | \n",
+ " 12 | \n",
+ " 12 | \n",
+ " 12 | \n",
+ " 12 | \n",
+ " 12 | \n",
+ " 12 | \n",
+ "
\n",
+ " \n",
+ " | Nepal | \n",
+ " 12 | \n",
+ " 12 | \n",
+ " 12 | \n",
+ " 12 | \n",
+ " 11 | \n",
+ " 12 | \n",
+ " 11 | \n",
+ " 12 | \n",
+ " 12 | \n",
+ " 7 | \n",
+ " ... | \n",
+ " 6 | \n",
+ " 12 | \n",
+ " 12 | \n",
+ " 11 | \n",
+ " 12 | \n",
+ " 12 | \n",
+ " 12 | \n",
+ " 12 | \n",
+ " 12 | \n",
+ " 12 | \n",
+ "
\n",
+ " \n",
+ " | Kingdom of Morocco | \n",
+ " 12 | \n",
+ " 12 | \n",
+ " 12 | \n",
+ " 12 | \n",
+ " 11 | \n",
+ " 12 | \n",
+ " 11 | \n",
+ " 12 | \n",
+ " 12 | \n",
+ " 11 | \n",
+ " ... | \n",
+ " 5 | \n",
+ " 12 | \n",
+ " 12 | \n",
+ " 11 | \n",
+ " 12 | \n",
+ " 12 | \n",
+ " 12 | \n",
+ " 12 | \n",
+ " 12 | \n",
+ " 12 | \n",
+ "
\n",
+ " \n",
+ " | Republic of Mozambique | \n",
+ " 11 | \n",
+ " 11 | \n",
+ " 11 | \n",
+ " 11 | \n",
+ " 11 | \n",
+ " 11 | \n",
+ " 11 | \n",
+ " 11 | \n",
+ " 11 | \n",
+ " 9 | \n",
+ " ... | \n",
+ " 6 | \n",
+ " 11 | \n",
+ " 11 | \n",
+ " 11 | \n",
+ " 11 | \n",
+ " 11 | \n",
+ " 11 | \n",
+ " 11 | \n",
+ " 11 | \n",
+ " 11 | \n",
+ "
\n",
+ " \n",
+ " | Africa | \n",
+ " 11 | \n",
+ " 11 | \n",
+ " 11 | \n",
+ " 11 | \n",
+ " 11 | \n",
+ " 11 | \n",
+ " 8 | \n",
+ " 11 | \n",
+ " 11 | \n",
+ " 7 | \n",
+ " ... | \n",
+ " 7 | \n",
+ " 11 | \n",
+ " 11 | \n",
+ " 11 | \n",
+ " 11 | \n",
+ " 11 | \n",
+ " 11 | \n",
+ " 11 | \n",
+ " 11 | \n",
+ " 11 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
10 rows × 49 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " sector supplementprojectflg \\\n",
+ "countryname \n",
+ "People's Republic of China 19 19 \n",
+ "Republic of Indonesia 19 19 \n",
+ "Socialist Republic of Vietnam 17 16 \n",
+ "Republic of India 16 16 \n",
+ "Republic of Yemen 13 13 \n",
+ "People's Republic of Bangladesh 12 12 \n",
+ "Nepal 12 12 \n",
+ "Kingdom of Morocco 12 12 \n",
+ "Republic of Mozambique 11 11 \n",
+ "Africa 11 11 \n",
+ "\n",
+ " projectfinancialtype prodline mjtheme \\\n",
+ "countryname \n",
+ "People's Republic of China 19 19 17 \n",
+ "Republic of Indonesia 19 19 19 \n",
+ "Socialist Republic of Vietnam 17 17 17 \n",
+ "Republic of India 16 16 16 \n",
+ "Republic of Yemen 13 13 13 \n",
+ "People's Republic of Bangladesh 12 12 12 \n",
+ "Nepal 12 12 11 \n",
+ "Kingdom of Morocco 12 12 11 \n",
+ "Republic of Mozambique 11 11 11 \n",
+ "Africa 11 11 11 \n",
+ "\n",
+ " idacommamt impagency project_name \\\n",
+ "countryname \n",
+ "People's Republic of China 19 19 19 \n",
+ "Republic of Indonesia 19 19 19 \n",
+ "Socialist Republic of Vietnam 17 17 17 \n",
+ "Republic of India 16 15 16 \n",
+ "Republic of Yemen 13 13 13 \n",
+ "People's Republic of Bangladesh 12 12 12 \n",
+ "Nepal 12 11 12 \n",
+ "Kingdom of Morocco 12 11 12 \n",
+ "Republic of Mozambique 11 11 11 \n",
+ "Africa 11 8 11 \n",
+ "\n",
+ " mjthemecode closingdate ... sector3 \\\n",
+ "countryname ... \n",
+ "People's Republic of China 19 16 ... 11 \n",
+ "Republic of Indonesia 19 15 ... 10 \n",
+ "Socialist Republic of Vietnam 17 14 ... 10 \n",
+ "Republic of India 16 13 ... 9 \n",
+ "Republic of Yemen 13 8 ... 4 \n",
+ "People's Republic of Bangladesh 12 10 ... 6 \n",
+ "Nepal 12 7 ... 6 \n",
+ "Kingdom of Morocco 12 11 ... 5 \n",
+ "Republic of Mozambique 11 9 ... 6 \n",
+ "Africa 11 7 ... 7 \n",
+ "\n",
+ " majorsector_percent board_approval_month \\\n",
+ "countryname \n",
+ "People's Republic of China 19 19 \n",
+ "Republic of Indonesia 19 19 \n",
+ "Socialist Republic of Vietnam 17 17 \n",
+ "Republic of India 16 16 \n",
+ "Republic of Yemen 13 13 \n",
+ "People's Republic of Bangladesh 12 12 \n",
+ "Nepal 12 12 \n",
+ "Kingdom of Morocco 12 12 \n",
+ "Republic of Mozambique 11 11 \n",
+ "Africa 11 11 \n",
+ "\n",
+ " theme_namecode url source \\\n",
+ "countryname \n",
+ "People's Republic of China 17 19 19 \n",
+ "Republic of Indonesia 19 19 19 \n",
+ "Socialist Republic of Vietnam 17 17 17 \n",
+ "Republic of India 16 16 16 \n",
+ "Republic of Yemen 13 13 13 \n",
+ "People's Republic of Bangladesh 12 12 12 \n",
+ "Nepal 11 12 12 \n",
+ "Kingdom of Morocco 11 12 12 \n",
+ "Republic of Mozambique 11 11 11 \n",
+ "Africa 11 11 11 \n",
+ "\n",
+ " projectstatusdisplay ibrdcommamt \\\n",
+ "countryname \n",
+ "People's Republic of China 19 19 \n",
+ "Republic of Indonesia 19 19 \n",
+ "Socialist Republic of Vietnam 17 17 \n",
+ "Republic of India 16 16 \n",
+ "Republic of Yemen 13 13 \n",
+ "People's Republic of Bangladesh 12 12 \n",
+ "Nepal 12 12 \n",
+ "Kingdom of Morocco 12 12 \n",
+ "Republic of Mozambique 11 11 \n",
+ "Africa 11 11 \n",
+ "\n",
+ " sector_namecode _id \n",
+ "countryname \n",
+ "People's Republic of China 19 19 \n",
+ "Republic of Indonesia 19 19 \n",
+ "Socialist Republic of Vietnam 17 17 \n",
+ "Republic of India 16 16 \n",
+ "Republic of Yemen 13 13 \n",
+ "People's Republic of Bangladesh 12 12 \n",
+ "Nepal 12 12 \n",
+ "Kingdom of Morocco 12 12 \n",
+ "Republic of Mozambique 11 11 \n",
+ "Africa 11 11 \n",
+ "\n",
+ "[10 rows x 49 columns]"
+ ]
+ },
+ "execution_count": 65,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "#Find the 10 countries with most projects\n",
+ "df.groupby('countryname').count().sort_values('sector',ascending=False)[:10]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 52,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "{'1': 'Economic management', '2': 'Public sector governance', '3': 'Rule of law', '4': 'Financial and private sector development', '5': 'Trade and integration', '6': 'Social protection and risk management', '7': 'Social dev/gender/inclusion', '8': 'Human development', '9': 'Urban development', '10': 'Rural development', '11': 'Environment and natural resources management'}\n",
+ " 0 \\\n",
+ "0 {'code': '8', 'name': 'Human development'} \n",
+ "1 {'code': '1', 'name': 'Economic management'} \n",
+ "2 {'code': '5', 'name': 'Trade and integration'} \n",
+ "3 {'code': '7', 'name': 'Social dev/gender/inclu... \n",
+ "4 {'code': '5', 'name': 'Trade and integration'} \n",
+ ".. ... \n",
+ "495 {'code': '4', 'name': 'Financial and private s... \n",
+ "496 {'code': '8', 'name': 'Human development'} \n",
+ "497 {'code': '10', 'name': 'Rural development'} \n",
+ "498 {'code': '10', 'name': 'Rural development'} \n",
+ "499 {'code': '9', 'name': 'Urban development'} \n",
+ "\n",
+ " 1 \\\n",
+ "0 {'code': '11', 'name': ''} \n",
+ "1 {'code': '6', 'name': 'Social protection and r... \n",
+ "2 {'code': '2', 'name': 'Public sector governance'} \n",
+ "3 {'code': '7', 'name': 'Social dev/gender/inclu... \n",
+ "4 {'code': '4', 'name': 'Financial and private s... \n",
+ ".. ... \n",
+ "495 {'code': '7', 'name': 'Social dev/gender/inclu... \n",
+ "496 {'code': '5', 'name': 'Trade and integration'} \n",
+ "497 {'code': '6', 'name': ''} \n",
+ "498 {'code': '10', 'name': 'Rural development'} \n",
+ "499 {'code': '8', 'name': 'Human development'} \n",
+ "\n",
+ " 2 \\\n",
+ "0 None \n",
+ "1 None \n",
+ "2 {'code': '11', 'name': 'Environment and natura... \n",
+ "3 None \n",
+ "4 None \n",
+ ".. ... \n",
+ "495 None \n",
+ "496 {'code': '2', 'name': 'Public sector governance'} \n",
+ "497 None \n",
+ "498 {'code': '10', 'name': 'Rural development'} \n",
+ "499 {'code': '5', 'name': 'Trade and integration'} \n",
+ "\n",
+ " 3 4 code_0 \\\n",
+ "0 None None 8 \n",
+ "1 None None 1 \n",
+ "2 {'code': '6', 'name': 'Social protection and r... None 5 \n",
+ "3 None None 7 \n",
+ "4 None None 5 \n",
+ ".. ... ... ... \n",
+ "495 None None 4 \n",
+ "496 {'code': '8', 'name': 'Human development'} None 8 \n",
+ "497 None None 10 \n",
+ "498 None None 10 \n",
+ "499 {'code': '4', 'name': 'Financial and private s... None 9 \n",
+ "\n",
+ " name_0 code_1 \\\n",
+ "0 Human development 11 \n",
+ "1 Economic management 6 \n",
+ "2 Trade and integration 2 \n",
+ "3 Social dev/gender/inclusion 7 \n",
+ "4 Trade and integration 4 \n",
+ ".. ... ... \n",
+ "495 Financial and private sector development 7 \n",
+ "496 Human development 5 \n",
+ "497 Rural development 6 \n",
+ "498 Rural development 10 \n",
+ "499 Urban development 8 \n",
+ "\n",
+ " name_1 code_2 \\\n",
+ "0 Environment and natural resources management NaN \n",
+ "1 Social protection and risk management NaN \n",
+ "2 Public sector governance 11 \n",
+ "3 Social dev/gender/inclusion NaN \n",
+ "4 Financial and private sector development NaN \n",
+ ".. ... ... \n",
+ "495 Social dev/gender/inclusion NaN \n",
+ "496 Trade and integration 2 \n",
+ "497 Social protection and risk management NaN \n",
+ "498 Rural development 10 \n",
+ "499 Human development 5 \n",
+ "\n",
+ " name_2 code_3 \\\n",
+ "0 NaN NaN \n",
+ "1 NaN NaN \n",
+ "2 Environment and natural resources management 6 \n",
+ "3 NaN NaN \n",
+ "4 NaN NaN \n",
+ ".. ... ... \n",
+ "495 NaN NaN \n",
+ "496 Public sector governance 8 \n",
+ "497 NaN NaN \n",
+ "498 Rural development NaN \n",
+ "499 Trade and integration 4 \n",
+ "\n",
+ " name_3 code_4 name_4 \n",
+ "0 NaN NaN NaN \n",
+ "1 NaN NaN NaN \n",
+ "2 Social protection and risk management NaN NaN \n",
+ "3 NaN NaN NaN \n",
+ "4 NaN NaN NaN \n",
+ ".. ... ... ... \n",
+ "495 NaN NaN NaN \n",
+ "496 Human development NaN NaN \n",
+ "497 NaN NaN NaN \n",
+ "498 NaN NaN NaN \n",
+ "499 Financial and private sector development NaN NaN \n",
+ "\n",
+ "[500 rows x 15 columns]\n",
+ "{'1': 38, '2': 199, '3': 15, '4': 146, '5': 77, '6': 168, '7': 130, '8': 210, '9': 50, '10': 216, '11': 250}\n"
+ ]
+ }
+ ],
+ "source": [
+ "#Find the top 10 major project themes (using column 'mjtheme_namecode')\n",
+ "dd=pd.DataFrame(df.mjtheme_namecode) #only focus on this column\n",
+ "\n",
+ "#let's create one column for each code and name for each project\n",
+ "norm1=pd.json_normalize(data=dd['mjtheme_namecode'])\n",
+ "\n",
+ "for i in range(5): #5 is number of columns in norm1\n",
+ " n='name'+str(i)\n",
+ " norm1[('code_'+str(i))]=pd.json_normalize(norm1.iloc[:,i])['code']\n",
+ " norm1[('name_'+str(i))]=pd.json_normalize(norm1.iloc[:,i])['name']\n",
+ " \n",
+ "norm1.dropna(axis=1, how='all',inplace=True)#drop empty columns\n",
+ "norm1.dropna(subset=\"code_0\",inplace=True) #drop rows with nan in code_0\n",
+ "\n",
+ "#fill-in missing names\n",
+ "\n",
+ "#first, figure out code/name pairs\n",
+ "pairs = {}\n",
+ "for i in range(11): #we know there are 11 codes\n",
+ " for j in range(len(norm1)):\n",
+ " if int(norm1.loc[j,'code_0'])==i+1 and norm1.loc[j,'name_0'] != \"\":\n",
+ " pairs[str(i+1)]=norm1.loc[j,'name_0']\n",
+ " break\n",
+ "print(pairs)\n",
+ "#now fill in names\n",
+ "for j in range (5):\n",
+ " for i in range(len(norm1)):\n",
+ " n='name_'+str(j)\n",
+ " c=\"code_\"+str(j)\n",
+ " if norm1.loc[i,c] != \"\" and pd.notna(norm1.loc[i,c]):\n",
+ " if norm1.loc[i,n]==\"\": norm1.loc[i,n]= pairs[norm1.loc[i,c]]# we had a code but missing name\n",
+ "print(norm1)\n",
+ "\n",
+ "# question is a bit ambiguous, we'll calculate the overall number of project names across all columns\n",
+ "#since a given row may have more than one project name attached to it.\n",
+ "pairCount=pairs # just to re-use the key, now the values will hold count\n",
+ "for i in range(len(pairCount)):\n",
+ " pairCount[str(i+1)]=0\n",
+ "for j in range (5):\n",
+ " for i in range(len(norm1)):\n",
+ " n='name_'+str(j)\n",
+ " c=\"code_\"+str(j)\n",
+ " if pd.notna(norm1.loc[i,c]):\n",
+ " pairCount[norm1.loc[i,c]]+=1\n",
+ "print(pairCount)"
+ ]
+ },
{
"cell_type": "code",
"execution_count": null,
- "metadata": {
- "collapsed": true
- },
+ "metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
- "display_name": "Python 2",
+ "display_name": "Python 3 (ipykernel)",
"language": "python",
- "name": "python2"
+ "name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
- "version": 2
+ "version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
- "pygments_lexer": "ipython2",
- "version": "2.7.9"
+ "pygments_lexer": "ipython3",
+ "version": "3.9.12"
}
},
"nbformat": 4,
- "nbformat_minor": 0
+ "nbformat_minor": 1
}