Source code for wikidata2df.wikidata2df
"""Main module."""
from collections import defaultdict
from functools import lru_cache
from itertools import product, chain
import pandas as pd
import requests
[docs]def parse_query_results(query_result):
"""Parse wikidata query json into a nice dataframe
Args:
query_result(dict): A json dict with the results from the query
Returns:
A Pandas DataFrame with the query results.
"""
parsed_results = defaultdict(list)
data = query_result["results"]["bindings"]
keys = frozenset(chain.from_iterable(data))
for json_key, item in product(data, keys):
try:
parsed_results[item].append(json_key[item]["value"])
except:
# If there is no data for a key, append None
parsed_results[item].append(None)
results_df = pd.DataFrame.from_dict(parsed_results).replace(
{"http://www.wikidata.org/entity/": ""}, regex=True
)
return results_df
[docs]@lru_cache(maxsize=10)
def wikidata2df(query):
"""Transform a wikidata SPARQL query into a Pandas DataFrame
Wrapper function that performs a request to the wikidata endpoint and returns a dataframe.
If there is no result found, it will raise an exception. If there were optional
fields in your query, the result will have rows with value "None", corresponding
to values that were not found.
Args:
query(str): A string containing a valid SPARQL query.
Returns:
A Pandas DataFrame with the results of the query.
"""
query_res = perform_query(query)
parsed_res = parse_query_results(query_res)
return parsed_res