Source code for app.parser
import model.models as model
import datetime
try:
import xml.etree.cElementTree as Et
except ImportError:
import xml.etree.ElementTree as Et
[docs]class Parser(object):
def __init__(self, content):
self._content = content
self._root = Et.fromstring(content)
self._observations_slices = self._get_observations_slices()
def _get_observations_slices(self):
"""Creates an inverse index of observations and slices.
:returns: map in which the key is the observation ID and the
value is its corresponding slice ID.
"""
obs_map = {}
slices = self._root.findall(".//slice")
for sli in slices:
observations = sli.findall("./referred/observation-ref")
for obs in observations:
obs_map[obs.get("id")] = sli.get("id")
return obs_map
[docs] def get_simple_indicators(self):
"""
:returns: list of simple indicators found in the XML file.
"""
ind_root = self._root.find("indicators")
return (self._get_simple_indicator(ind) for ind
in ind_root.findall("indicator"))
[docs] def get_compound_indicators(self):
"""
:returns: list of compound indicators found in the XML file.
"""
ind_root = self._root.find("indicators")
return (self._get_compound_indicator(ind) for ind
in ind_root.findall("compound_indicator"))
[docs] def get_indicator_groups(self):
"""
:returns: list of indicator groups found in the XML file.
"""
groups_element = self._root.find('indicator_groups')\
.findall('indicator_group')
return [self._get_indicator_group(ind) for ind in groups_element]
[docs] def get_user(self):
"""
:returns: user data found in the XML file (whithout user_ip)
"""
username = self._root.find('import_process').find('user').text
return model.User(id=username, timestamp=datetime.datetime.utcnow())
[docs] def get_organization(self):
"""
:returns: organization data found in the XML file. The organization ID
is constructed with the domain name in the organization_url, for
example www.observatoire-foncier.com will result in the ID
'observatoire-foncier'.
"""
org_name = self._root.find('import_process').find('organization_name').text
org_url = self._root.find('import_process').find('organization_url').text
org_id = org_url.split(".")[1]
organization = model.Organization(name=org_name, id=org_id)
organization.url = org_url
return organization
[docs] def get_datasource(self):
"""
:returns: datasource data found in the XML file.
"""
dsource = self._root.find('import_process').find('datasource')
dsource_name = dsource.text
dsource_id = dsource.get("id")
return model.DataSource(name=dsource_name, dsource_id=dsource_id)
[docs] def get_dataset(self):
"""
:returns: dataset data found in the XML file.
"""
dataset = model.Dataset()
dataset.id = self._root.get('id')
dataset.sdmx_frequency = self._root.find('import_process').\
find('sdmx_frequency').text
dataset.license = self.get_license()
return dataset
[docs] def get_license(self):
"""
:returns: license data of the XML file.
"""
name = self._root.find('license').find('lic_name').text
description = self._root.find('license').find('lic_description').text
republish = self._root.find('license').find('republish').text
url = self._root.find('license').find('lic_url').text
return model.License(name=name, description=description,\
republish=bool(republish), url=url)
def _get_simple_indicator(self, ind):
indicator = model.Indicator(id=ind.get('id'))
indicator.measurement_unit = self._parse_measurement_unit(ind.find("measure_unit"))
indicator.topic_id = ind.find('topic-ref').text
indicator.preferable_tendency = ind.find('preferable_tendency').text
indicator.add_translation(
model.IndicatorTranslation(lang_code='en',
name=ind.find('ind_name_en').text,
description=ind.find('ind_description_en').text))
indicator.add_translation(
model.IndicatorTranslation(lang_code='es',
name=ind.find('ind_name_es').text,
description=ind.find('ind_description_es').text))
indicator.add_translation(
model.IndicatorTranslation(lang_code='fr',
name=ind.find('ind_name_fr').text,
description=ind.find('ind_description_fr').text))
# The indicator may be related with others
# The attribute related_id WILL NOT be persisted to the database and
# it is only used to create the relationships objects in the services
indicator.related_id = []
if ind.find('splitsIn') is not None:
for rel in ind.find('splitsIn').findall('indicator-ref'):
indicator.related_id.append(rel.get('id'))
return indicator
def _get_compound_indicator(self, ind):
indicator = model.CompoundIndicator(id=ind.get('id'))
indicator.measurement_unit = self._parse_measurement_unit(ind.find("measure_unit"))
indicator.topic_id = ind.find('topic-ref').text
indicator.preferable_tendency = ind.find('preferable_tendency').text
indicator.add_translation(
model.IndicatorTranslation(lang_code='en',
name=ind.find('ind_name_en').text,
description=ind.find('ind_description_en').text))
indicator.add_translation(
model.IndicatorTranslation(lang_code='es',
name=ind.find('ind_name_es').text,
description=ind.find('ind_description_es').text))
indicator.add_translation(
model.IndicatorTranslation(lang_code='fr',
name=ind.find('ind_name_fr').text,
description=ind.find('ind_description_fr').text))
# The indicator may be related with others
# The attribute related_id WILL NOT be persisted to the database and
# it is only used to create the relationships objects in the services
indicator.related_id = []
for rel in ind.findall('indicator-ref'):
indicator.related_id.append(rel.get('id'))
return indicator
def _get_indicator_group(self, group):
indicator = model.IndicatorGroup()
indicator.id = group.get('id')
#The indicator group is linked to a CompoundIndicator. The attribute
#indicator-ref will be used in the services layer to link the
#IndicatorGroup and the CompoundIndicator, and it will not be
#persisted to the database
indicator.indicator_ref = group.get('indicator-ref')
return indicator
[docs] def get_observations(self):
"""
:returns: list of observations found in the XML files. Every observation
comes with its ref_time, value and computation. Each observation
has the fields 'region_code' (UN_code) and 'country_code' (ISO3) of
the region they reffer to.
"""
obs_root = self._root.find("observations")
return (self._get_observation(obs) for obs in obs_root.findall("observation"))
[docs] def get_slices(self):
"""
:returns: list of slices found in the XML files. Each slice
has the fields 'region_code' (UN_code) and 'country_code' (ISO3) of
the region they reffer to.
"""
sli_root = self._root.find('slices')
return (self._get_slice(sli) for sli in sli_root.findall('slice'))
def _get_observation(self, obs):
def get_slice_id(obs_id):
try:
return self._observations_slices[obs_id]
except KeyError:
return None
observation = model.Observation()
observation.id = obs.get('id')
observation.indicator_id = obs.find('indicator-ref').get('indicator')
observation.ref_time = self._parse_time(obs.find('time'))
observation.issued = self._parse_issued(obs.find('issued'))
observation.value = self._parse_obs_value(obs.find('obs-status'), obs.find('value'))
observation.computation = self._parse_computation(obs.find('computation'))
observation.indicator_group_id = obs.get('group')
observation.slice_id = get_slice_id(observation.id)
observation.dataset_id = self.get_dataset().id
# An observation may refer to a country or to a whole region
# This fields will not be persisted to the database, instead it will
# be used to link the observation with is referred region or country in the
# services layer.
observation.region_code = obs.find("region").text\
if obs.find("region") is not None\
else None
observation.country_code = obs.find("country").text\
if obs.find("country") is not None\
else None
return observation
def _get_slice(self, sli):
slice = model.Slice(id=sli.get('id'))
slice.indicator_id = sli.find('sli_metadata')\
.find('indicator-ref').get('id')
metadata = sli.find("sli_metadata")
slice.dataset_id = self.get_dataset().id
# The slice's dimension may be a Region or a Time. If it is a Time we
# can create it here and link it with the slice. If it is a Region we
# we must check the API using it's region code or iso3 code.
slice.dimension = self._parse_time(metadata.find("time"))\
if metadata.find("time") is not None\
else None
# Those fields will not be persisted to the database, instead it will
# be used to link the slice with its regions from the helpers layer.
slice.region_code = metadata.find("region").text\
if metadata.find("region") is not None\
else None
slice.country_code = metadata.find("country").text\
if metadata.find("country") is not None\
else None
return slice
@staticmethod
def _parse_time(node):
if node.get("unit") == "years":
interval = node.find("interval")
if interval is not None:
start_year = int(interval.find("beginning").text)
end_year = int(interval.find("end").text)
beginning = datetime.date(year=start_year, month=1, day=1)
end = datetime.date(year=end_year+1, month=1, day=1)
return model.Interval(beginning, end)
else:
return model.YearInterval(int(node.text))
elif node.get("unit") == "months":
#The information comes in the format MM/YYYY
month = int(node.text.split("/")[0])
year = int(node.text.split("/")[1])
return model.MonthInterval(month, year)
@staticmethod
def _parse_issued(node):
date = datetime.datetime.strptime(node.text, '%Y-%m-%dT%H:%M:%S')
return model.Instant(instant=date)
@staticmethod
def _parse_obs_value(status_node, value_node):
value = model.Value(obs_status=status_node.text)
if value_node is not None:
value.value = value_node.text
return value
@staticmethod
def _parse_computation(node):
computation = model.Computation(description=node.text, uri=node.get("type"))
return computation
@staticmethod
def _parse_measurement_unit(node):
return model.MeasurementUnit(
name=node.text,
convertible_to=node.get("convertible_to"),
factor=node.get("factor")
)