https://github.com/amirziai/flatten

алсяnormalize очень вложенный файл JSON, который я позже проанализирую. То, с чем я борюсь, это как пройти глубину более одного уровня, чтобы нормализоваться.

Я прошел черезpandas.io.json.json_normalize документация, поскольку она делает именно то, что я хочу.

Я смог нормализовать часть этого и теперь понимаю, как работают словари, но я все еще не там.

С кодом ниже я могу получить только первый уровень.

import json
import pandas as pd
from pandas.io.json import json_normalize

with open('authors_sample.json') as f:
    d = json.load(f)

raw = json_normalize(d['hits']['hits'])

authors = json_normalize(data = d['hits']['hits'], 
                         record_path = '_source', 
                         meta = ['_id', ['_source', 'journal'], ['_source', 'title'], 
                                 ['_source', 'normalized_venue_name']
                                 ])

Я пытаюсь "копаться" в словарь "авторов" с кодом ниже, ноrecord_path = ['_source', 'authors'] бросает меняTypeError: string indices must be integers, Насколько я понимаюjson_normalize логика должна быть хорошей, но я все еще не совсем понимаю, как погрузиться в JSON сdict противlist.

Я даже прошел через это простоепример.

authors = json_normalize(data = d['hits']['hits'], 
                         record_path = ['_source', 'authors'], 
                         meta = ['_id', ['_source', 'journal'], ['_source', 'title'], 
                                 ['_source', 'normalized_venue_name']
                                 ])

Ниже приведен фрагмент файла JSON (5 записей).

{u'_shards': {u'failed': 0, u'successful': 5, u'total': 5},
 u'hits': {u'hits': [{u'_id': u'7CB3F2AD',
    u'_index': u'scibase_listings',
    u'_score': 1.0,
    u'_source': {u'authors': None,
     u'deleted': 0,
     u'description': None,
     u'doi': u'',
     u'is_valid': 1,
     u'issue': None,
     u'journal': u'Physical Review Letters',
     u'link': None,
     u'meta_description': None,
     u'meta_keywords': None,
     u'normalized_venue_name': u'phys rev lett',
     u'pages': None,
     u'parent_keywords': [u'Chromatography',
      u'Quantum mechanics',
      u'Particle physics',
      u'Quantum field theory',
      u'Analytical chemistry',
      u'Quantum chromodynamics',
      u'Physics',
      u'Mass spectrometry',
      u'Chemistry'],
     u'pub_date': u'1987-03-02 00:00:00',
     u'pubtype': None,
     u'rating_avg_weighted': 0,
     u'rating_clarity': 0.0,
     u'rating_clarity_weighted': 0.0,
     u'rating_innovation': 0.0,
     u'rating_innovation_weighted': 0.0,
     u'rating_num_weighted': 0,
     u'rating_reproducability': 0,
     u'rating_reproducibility_weighted': 0.0,
     u'rating_versatility': 0.0,
     u'rating_versatility_weighted': 0.0,
     u'review_count': 0,
     u'tag': [u'mass spectra', u'elementary particles', u'bound states'],
     u'title': u'Evidence for a new meson: A quasinuclear NN-bar bound state',
     u'userAvg': 0.0,
     u'user_id': None,
     u'venue_name': u'Physical Review Letters',
     u'views_count': 0,
     u'volume': None},
    u'_type': u'listing'},
   {u'_id': u'7AF8EBC3',
    u'_index': u'scibase_listings',
    u'_score': 1.0,
    u'_source': {u'authors': [{u'affiliations': [u'Punjabi University'],
       u'author_id': u'780E3459',
       u'author_name': u'munish puri'},
      {u'affiliations': [u'Punjabi University'],
       u'author_id': u'48D92C79',
       u'author_name': u'rajesh dhaliwal'},
      {u'affiliations': [u'Punjabi University'],
       u'author_id': u'7D9BD37C',
       u'author_name': u'r s singh'}],
     u'deleted': 0,
     u'description': None,
     u'doi': u'',
     u'is_valid': 1,
     u'issue': None,
     u'journal': u'Journal of Industrial Microbiology & Biotechnology',
     u'link': None,
     u'meta_description': None,
     u'meta_keywords': None,
     u'normalized_venue_name': u'j ind microbiol biotechnol',
     u'pages': None,
     u'parent_keywords': [u'Nuclear medicine',
      u'Psychology',
      u'Hydrology',
      u'Chromatography',
      u'X-ray crystallography',
      u'Nuclear fusion',
      u'Medicine',
      u'Fluid dynamics',
      u'Thermodynamics',
      u'Physics',
      u'Gas chromatography',
      u'Radiobiology',
      u'Engineering',
      u'Organic chemistry',
      u'High-performance liquid chromatography',
      u'Chemistry',
      u'Organic synthesis',
      u'Psychotherapist'],
     u'pub_date': u'2008-04-04 00:00:00',
     u'pubtype': None,
     u'rating_avg_weighted': 0,
     u'rating_clarity': 0.0,
     u'rating_clarity_weighted': 0.0,
     u'rating_innovation': 0.0,
     u'rating_innovation_weighted': 0.0,
     u'rating_num_weighted': 0,
     u'rating_reproducability': 0,
     u'rating_reproducibility_weighted': 0.0,
     u'rating_versatility': 0.0,
     u'rating_versatility_weighted': 0.0,
     u'review_count': 0,
     u'tag': [u'flow rate',
      u'operant conditioning',
      u'packed bed reactor',
      u'immobilized enzyme',
      u'specific activity'],
     u'title': u'Development of a stable continuous flow immobilized enzyme reactor for the hydrolysis of inulin',
     u'userAvg': 0.0,
     u'user_id': None,
     u'venue_name': u'Journal of Industrial Microbiology & Biotechnology',
     u'views_count': 0,
     u'volume': None},
    u'_type': u'listing'},
   {u'_id': u'7521A721',
    u'_index': u'scibase_listings',
    u'_score': 1.0,
    u'_source': {u'authors': [{u'author_id': u'7FF872BC',
       u'author_name': u'barbara eileen ryan'}],
     u'deleted': 0,
     u'description': None,
     u'doi': u'',
     u'is_valid': 1,
     u'issue': None,
     u'journal': u'The American Historical Review',
     u'link': None,
     u'meta_description': None,
     u'meta_keywords': None,
     u'normalized_venue_name': u'american historical review',
     u'pages': None,
     u'parent_keywords': [u'Social science',
      u'Politics',
      u'Sociology',
      u'Law'],
     u'pub_date': u'1992-01-01 00:00:00',
     u'pubtype': None,
     u'rating_avg_weighted': 0,
     u'rating_clarity': 0.0,
     u'rating_clarity_weighted': 0.0,
     u'rating_innovation': 0.0,
     u'rating_innovation_weighted': 0.0,
     u'rating_num_weighted': 0,
     u'rating_reproducability': 0,
     u'rating_reproducibility_weighted': 0.0,
     u'rating_versatility': 0.0,
     u'rating_versatility_weighted': 0.0,
     u'review_count': 0,
     u'tag': [u'social movements'],
     u'title': u"Feminism and the women's movement : dynamics of change in social movement ideology, and activism",
     u'userAvg': 0.0,
     u'user_id': None,
     u'venue_name': u'The American Historical Review',
     u'views_count': 0,
     u'volume': None},
    u'_type': u'listing'},
   {u'_id': u'7DAEB9A4',
    u'_index': u'scibase_listings',
    u'_score': 1.0,
    u'_source': {u'authors': [{u'author_id': u'0299B8E9',
       u'author_name': u'fraser j harbutt'}],
     u'deleted': 0,
     u'description': None,
     u'doi': u'',
     u'is_valid': 1,
     u'issue': None,
     u'journal': u'The American Historical Review',
     u'link': None,
     u'meta_description': None,
     u'meta_keywords': None,
     u'normalized_venue_name': u'american historical review',
     u'pages': None,
     u'parent_keywords': [u'Superconductivity',
      u'Nuclear fusion',
      u'Geology',
      u'Chemistry',
      u'Metallurgy'],
     u'pub_date': u'1988-01-01 00:00:00',
     u'pubtype': None,
     u'rating_avg_weighted': 0,
     u'rating_clarity': 0.0,
     u'rating_clarity_weighted': 0.0,
     u'rating_innovation': 0.0,
     u'rating_innovation_weighted': 0.0,
     u'rating_num_weighted': 0,
     u'rating_reproducability': 0,
     u'rating_reproducibility_weighted': 0.0,
     u'rating_versatility': 0.0,
     u'rating_versatility_weighted': 0.0,
     u'review_count': 0,
     u'tag': [u'iron'],
     u'title': u'The iron curtain : Churchill, America, and the origins of the Cold War',
     u'userAvg': 0.0,
     u'user_id': None,
     u'venue_name': u'The American Historical Review',
     u'views_count': 0,
     u'volume': None},
    u'_type': u'listing'},
   {u'_id': u'7B3236C5',
    u'_index': u'scibase_listings',
    u'_score': 1.0,
    u'_source': {u'authors': [{u'author_id': u'7DAB7B72',
       u'author_name': u'richard m freeland'}],
     u'deleted': 0,
     u'description': None,
     u'doi': u'',
     u'is_valid': 1,
     u'issue': None,
     u'journal': u'The American Historical Review',
     u'link': None,
     u'meta_description': None,
     u'meta_keywords': None,
     u'normalized_venue_name': u'american historical review',
     u'pages': None,
     u'parent_keywords': [u'Political Science', u'Economics'],
     u'pub_date': u'1985-01-01 00:00:00',
     u'pubtype': None,
     u'rating_avg_weighted': 0,
     u'rating_clarity': 0.0,
     u'rating_clarity_weighted': 0.0,
     u'rating_innovation': 0.0,
     u'rating_innovation_weighted': 0.0,
     u'rating_num_weighted': 0,
     u'rating_reproducability': 0,
     u'rating_reproducibility_weighted': 0.0,
     u'rating_versatility': 0.0,
     u'rating_versatility_weighted': 0.0,
     u'review_count': 0,
     u'tag': [u'foreign policy'],
     u'title': u'The Truman Doctrine and the origins of McCarthyism : foreign policy, domestic politics, and internal security, 1946-1948',
     u'userAvg': 0.0,
     u'user_id': None,
     u'venue_name': u'The American Historical Review',
     u'views_count': 0,
     u'volume': None},
    u'_type': u'listing'}],
  u'max_score': 1.0,
  u'total': 36429433},
 u'timed_out': False,
 u'took': 170}

Ответы на вопрос(2)

Ваш ответ на вопрос