pandas.io.json.json_normalize con json muy anidado

He estado tratando denormalize un archivo json muy anidado que luego analizaré. Con lo que estoy luchando es cómo ir más allá de un nivel de profundidad para normalizar.

Pasé por elpandas.io.json.json_normalize documentación, ya que hace exactamente lo que quiero que haga.

He podido normalizar parte de él y ahora entiendo cómo funcionan los diccionarios, pero todavía no estoy allí.

Con el siguiente código solo puedo obtener el primer nivel.

import json
import pandas as pd
from pandas.io.json import json_normalize

with open('authors_sample.json') as f:
    d = json.load(f)

raw = json_normalize(d['hits']['hits'])

authors = json_normalize(data = d['hits']['hits'], 
                         record_path = '_source', 
                         meta = ['_id', ['_source', 'journal'], ['_source', 'title'], 
                                 ['_source', 'normalized_venue_name']
                                 ])

Estoy tratando de "cavar" en el diccionario de "autores" con el siguiente código, pero elrecord_path = ['_source', 'authors'] me tiraTypeError: string indices must be integers. Hasta donde yo entiendojson_normalize la lógica debería ser buena, pero todavía no entiendo cómo sumergirse en un json condict vslist.

Incluso pasé por esto simpleejemplo.

authors = json_normalize(data = d['hits']['hits'], 
                         record_path = ['_source', 'authors'], 
                         meta = ['_id', ['_source', 'journal'], ['_source', 'title'], 
                                 ['_source', 'normalized_venue_name']
                                 ])

A continuación se muestra un fragmento del archivo json (5 registros).

{u'_shards': {u'failed': 0, u'successful': 5, u'total': 5},
 u'hits': {u'hits': [{u'_id': u'7CB3F2AD',
    u'_index': u'scibase_listings',
    u'_score': 1.0,
    u'_source': {u'authors': None,
     u'deleted': 0,
     u'description': None,
     u'doi': u'',
     u'is_valid': 1,
     u'issue': None,
     u'journal': u'Physical Review Letters',
     u'link': None,
     u'meta_description': None,
     u'meta_keywords': None,
     u'normalized_venue_name': u'phys rev lett',
     u'pages': None,
     u'parent_keywords': [u'Chromatography',
      u'Quantum mechanics',
      u'Particle physics',
      u'Quantum field theory',
      u'Analytical chemistry',
      u'Quantum chromodynamics',
      u'Physics',
      u'Mass spectrometry',
      u'Chemistry'],
     u'pub_date': u'1987-03-02 00:00:00',
     u'pubtype': None,
     u'rating_avg_weighted': 0,
     u'rating_clarity': 0.0,
     u'rating_clarity_weighted': 0.0,
     u'rating_innovation': 0.0,
     u'rating_innovation_weighted': 0.0,
     u'rating_num_weighted': 0,
     u'rating_reproducability': 0,
     u'rating_reproducibility_weighted': 0.0,
     u'rating_versatility': 0.0,
     u'rating_versatility_weighted': 0.0,
     u'review_count': 0,
     u'tag': [u'mass spectra', u'elementary particles', u'bound states'],
     u'title': u'Evidence for a new meson: A quasinuclear NN-bar bound state',
     u'userAvg': 0.0,
     u'user_id': None,
     u'venue_name': u'Physical Review Letters',
     u'views_count': 0,
     u'volume': None},
    u'_type': u'listing'},
   {u'_id': u'7AF8EBC3',
    u'_index': u'scibase_listings',
    u'_score': 1.0,
    u'_source': {u'authors': [{u'affiliations': [u'Punjabi University'],
       u'author_id': u'780E3459',
       u'author_name': u'munish puri'},
      {u'affiliations': [u'Punjabi University'],
       u'author_id': u'48D92C79',
       u'author_name': u'rajesh dhaliwal'},
      {u'affiliations': [u'Punjabi University'],
       u'author_id': u'7D9BD37C',
       u'author_name': u'r s singh'}],
     u'deleted': 0,
     u'description': None,
     u'doi': u'',
     u'is_valid': 1,
     u'issue': None,
     u'journal': u'Journal of Industrial Microbiology & Biotechnology',
     u'link': None,
     u'meta_description': None,
     u'meta_keywords': None,
     u'normalized_venue_name': u'j ind microbiol biotechnol',
     u'pages': None,
     u'parent_keywords': [u'Nuclear medicine',
      u'Psychology',
      u'Hydrology',
      u'Chromatography',
      u'X-ray crystallography',
      u'Nuclear fusion',
      u'Medicine',
      u'Fluid dynamics',
      u'Thermodynamics',
      u'Physics',
      u'Gas chromatography',
      u'Radiobiology',
      u'Engineering',
      u'Organic chemistry',
      u'High-performance liquid chromatography',
      u'Chemistry',
      u'Organic synthesis',
      u'Psychotherapist'],
     u'pub_date': u'2008-04-04 00:00:00',
     u'pubtype': None,
     u'rating_avg_weighted': 0,
     u'rating_clarity': 0.0,
     u'rating_clarity_weighted': 0.0,
     u'rating_innovation': 0.0,
     u'rating_innovation_weighted': 0.0,
     u'rating_num_weighted': 0,
     u'rating_reproducability': 0,
     u'rating_reproducibility_weighted': 0.0,
     u'rating_versatility': 0.0,
     u'rating_versatility_weighted': 0.0,
     u'review_count': 0,
     u'tag': [u'flow rate',
      u'operant conditioning',
      u'packed bed reactor',
      u'immobilized enzyme',
      u'specific activity'],
     u'title': u'Development of a stable continuous flow immobilized enzyme reactor for the hydrolysis of inulin',
     u'userAvg': 0.0,
     u'user_id': None,
     u'venue_name': u'Journal of Industrial Microbiology & Biotechnology',
     u'views_count': 0,
     u'volume': None},
    u'_type': u'listing'},
   {u'_id': u'7521A721',
    u'_index': u'scibase_listings',
    u'_score': 1.0,
    u'_source': {u'authors': [{u'author_id': u'7FF872BC',
       u'author_name': u'barbara eileen ryan'}],
     u'deleted': 0,
     u'description': None,
     u'doi': u'',
     u'is_valid': 1,
     u'issue': None,
     u'journal': u'The American Historical Review',
     u'link': None,
     u'meta_description': None,
     u'meta_keywords': None,
     u'normalized_venue_name': u'american historical review',
     u'pages': None,
     u'parent_keywords': [u'Social science',
      u'Politics',
      u'Sociology',
      u'Law'],
     u'pub_date': u'1992-01-01 00:00:00',
     u'pubtype': None,
     u'rating_avg_weighted': 0,
     u'rating_clarity': 0.0,
     u'rating_clarity_weighted': 0.0,
     u'rating_innovation': 0.0,
     u'rating_innovation_weighted': 0.0,
     u'rating_num_weighted': 0,
     u'rating_reproducability': 0,
     u'rating_reproducibility_weighted': 0.0,
     u'rating_versatility': 0.0,
     u'rating_versatility_weighted': 0.0,
     u'review_count': 0,
     u'tag': [u'social movements'],
     u'title': u"Feminism and the women's movement : dynamics of change in social movement ideology, and activism",
     u'userAvg': 0.0,
     u'user_id': None,
     u'venue_name': u'The American Historical Review',
     u'views_count': 0,
     u'volume': None},
    u'_type': u'listing'},
   {u'_id': u'7DAEB9A4',
    u'_index': u'scibase_listings',
    u'_score': 1.0,
    u'_source': {u'authors': [{u'author_id': u'0299B8E9',
       u'author_name': u'fraser j harbutt'}],
     u'deleted': 0,
     u'description': None,
     u'doi': u'',
     u'is_valid': 1,
     u'issue': None,
     u'journal': u'The American Historical Review',
     u'link': None,
     u'meta_description': None,
     u'meta_keywords': None,
     u'normalized_venue_name': u'american historical review',
     u'pages': None,
     u'parent_keywords': [u'Superconductivity',
      u'Nuclear fusion',
      u'Geology',
      u'Chemistry',
      u'Metallurgy'],
     u'pub_date': u'1988-01-01 00:00:00',
     u'pubtype': None,
     u'rating_avg_weighted': 0,
     u'rating_clarity': 0.0,
     u'rating_clarity_weighted': 0.0,
     u'rating_innovation': 0.0,
     u'rating_innovation_weighted': 0.0,
     u'rating_num_weighted': 0,
     u'rating_reproducability': 0,
     u'rating_reproducibility_weighted': 0.0,
     u'rating_versatility': 0.0,
     u'rating_versatility_weighted': 0.0,
     u'review_count': 0,
     u'tag': [u'iron'],
     u'title': u'The iron curtain : Churchill, America, and the origins of the Cold War',
     u'userAvg': 0.0,
     u'user_id': None,
     u'venue_name': u'The American Historical Review',
     u'views_count': 0,
     u'volume': None},
    u'_type': u'listing'},
   {u'_id': u'7B3236C5',
    u'_index': u'scibase_listings',
    u'_score': 1.0,
    u'_source': {u'authors': [{u'author_id': u'7DAB7B72',
       u'author_name': u'richard m freeland'}],
     u'deleted': 0,
     u'description': None,
     u'doi': u'',
     u'is_valid': 1,
     u'issue': None,
     u'journal': u'The American Historical Review',
     u'link': None,
     u'meta_description': None,
     u'meta_keywords': None,
     u'normalized_venue_name': u'american historical review',
     u'pages': None,
     u'parent_keywords': [u'Political Science', u'Economics'],
     u'pub_date': u'1985-01-01 00:00:00',
     u'pubtype': None,
     u'rating_avg_weighted': 0,
     u'rating_clarity': 0.0,
     u'rating_clarity_weighted': 0.0,
     u'rating_innovation': 0.0,
     u'rating_innovation_weighted': 0.0,
     u'rating_num_weighted': 0,
     u'rating_reproducability': 0,
     u'rating_reproducibility_weighted': 0.0,
     u'rating_versatility': 0.0,
     u'rating_versatility_weighted': 0.0,
     u'review_count': 0,
     u'tag': [u'foreign policy'],
     u'title': u'The Truman Doctrine and the origins of McCarthyism : foreign policy, domestic politics, and internal security, 1946-1948',
     u'userAvg': 0.0,
     u'user_id': None,
     u'venue_name': u'The American Historical Review',
     u'views_count': 0,
     u'volume': None},
    u'_type': u'listing'}],
  u'max_score': 1.0,
  u'total': 36429433},
 u'timed_out': False,
 u'took': 170}

Respuestas a la pregunta(2)

Su respuesta a la pregunta