Commit 35d65ae2 authored by Hugo Buddelmeijer's avatar Hugo Buddelmeijer
Browse files

+ Experimental SourceCollection class to access IVOA TAP resources as if

  they are any other SourceCollection.
parent 06678228
# -*- coding: utf-8 -*-
__version__ = '@(#)$Revision$'
from common.database.Database import database
from common.database.DBMain import DBObject, persistent
from common.log.Message import Message
from common.log.Error import Error
import datetime, os
import urllib
from SourceCollection import SourceCollection
from astro.util.SetRelations import SetRelations, SetRelationsSet
from astro.util.TableConverter import TableConverter
"""
This file contains the experimental ExternalTAP class, a SourceCollection to
access IVOA TAP Resources as if they are local SourceCollections.
# Below is a workaround to get a fake persistent class.
# Import it as:
from astro.main.sourcecollection.ExternalTAP import External as ExternalTAP
# Correct way:
class ExternalTap(SourceCollection):
resource = persistent('IVO URI of Resource [None]', str, '')
# Example: "ivo://wfau.roe.ac.uk/sdssdr7-dsa"
access_url = persistent('HTTP URI of Resource [None]', str, '')
# Example: "http://wfaudata.roe.ac.uk/sdssdr7-dsa/TAP/sync"
execution_mode = persistent('Execution mode', str, 'sync', '')
# 'sync' or 'async', should match the access_url
# Only 'sync' supported.
language = persistent('TAP Language to use [None]', str, 'ADQL')
# Only 'ADQL' is supported at the moment.
query_template = persistent('Template to use for query', str, '')
# A string that can be modified by other operators.
# The final table to select the sources/attributes from should be aliased to 't'.
# Example "SELECT %(select_clause)s FROM SpecObjAll AS t %(where_clause)s"
attribute_tapcolumns = persistent('Column names of the attributes in the TAP resource.', str, [])
# Should be in the same order as attribute_columns and attribute_names
row_identifiers = persistent('List of columns whose combination uniquely identifies a row.', str, [])
# Can be used to check which rows have not yet been fetched or stored
# in the database.
"""
from External import External as ExternalBase
class External(ExternalBase):
"""
The experimental ExternalTAP is a SourceCollection to access IVOA TAP
Resources as if they are local SourceCollections.
The ExternalTAP points to a IVOA TAP resources from which it can fetch
data. It has a local sourcelist_data to store this data, which can be
shared with SCs further in the tree.
E.g. An ExternalTAP can point to the PhotoObjAll table of an SDSS resource,
but not store any catalog data initially. A FilterSources can be created
with this ExternalTAP as parent, sharing the sourcelist_data. Once the
catalog data of the FS is retrieved, it can be stored in the same
sourcelist_data.
TAP functionality adds another 'layer' to evaluate operators on (next to
the Oracle SQL and the Python layers). To facilitate this, the following
functions are created:
- get_tapurl_dict(): Creates a dictionary from which the TAP request can
be generated. Similar to get_query_with_clauses and get_query_self.
This dictionary can be modified by the children of an SC.
Implemented in ExternalTAP, FilterSources.
- get_tapurl(): Creates the actual URL from the tapurl_dict to request the
catalog data.
Implement in SourceCollection.py
- load_data_tap(): Retrieves the catalog over TAP. Works only for trees
that contain SCs that support get_tapurl_dict().
Implement in SourceCollection.py
TODO [TAP]:
- Improve the attribute handling. Necessary to support other SCs,
e.g. RenameAttributes, SelectAttributes.
- Implement get_tapurl_dict() in other SourceCollections.
- Properly implement row_identifiers globally in the SCs. Required for
storing data. And also for possible future source extraction operators.
- Create true persistent class.
- Implement storing of catalog data. This is not yet possible because the
stored sources have a SID, but the newly fetched do not yet.
- Integrate TAP support with the SourceCollectionTree:
- Load data over TAP through load_data()
- Include optimization rules for TAP queries. (E.g. do not convert
FilterSources into SelectSources etc.)
- Determine wether we can use WITH clauses in ADQL queries.
- Implement 'async' execution mode.
- Implement ConeSearch?
"""
resource = None
access_url = None
execution_mode = None
language = None
query_template = None
attribute_tapcolumns = None
row_identifiers = None
def __init__(self):
super(self.__class__, self).__init__()
self.execution_mode = 'sync'
self.language = 'ADQL'
self.attribute_tapcolumns = []
def get_attributes_full(self, cache=False):
"""
Use the resource to get information about the attributes.
TODO MT M [TAP]:
- Automatically infer attribute_tapcolumns, attribute_names and
attribute_columns from the resource. E.g. to create sourcelist_data
automatically.
-
"""
# Return the cache if possible.
if self.cache_attributes_self and cache:
return self.cache_attributes_self
tapdict = self.get_tapurl_dict()
if not tapdict:
return False
attrs = ', '.join('t.'+a for a in tapdict['attributes'])
# Only one row is needed
attrs = ' TOP 1 ' + attrs
whereclause = ''
qq1 = tapdict['query_template'] % {'select_clause': attrs, 'where_clause': whereclause}
print qq1
qq = urllib.quote(qq1)
trurl = "%(access_url)s?REQUEST=doQuery&LANG=%(language)s&QUERY=%(query)s" % {
'access_url': tapdict['access_url'],
'language': tapdict['language'],
'query': qq,
}
tu = urllib.urlopen(trurl)
tapvot = tu.read()
tc = TableConverter()
tc.load_votable_string(tapvot)
self.debug_attributesfull_trurl = trurl
self.debug_attributesfull_tapvot = tapvot
self.debug_attributesfull_tc = tc
attributes = []
for tapcol, col, name in zip(self.attribute_tapcolumns, self.attribute_columns, self.attribute_names):
attr = {
'name': name,
'column': col,
'tapcolumn': tapcol,
'format': tc.attributes[tapcol]['format'],
'ucd': tc.attributes[tapcol]['ucd'],
'null': tc.attributes[tapcol]['null'],
'length': tc.attributes[tapcol]['length'],
# 'original', 'from', 'fromalias'
}
attributes.append(attr)
# Cache the attributes.
self.debug_attributes = attributes
# Cache the data if requested or if data was cached earlier to ensure
# it is up to date.
if cache or self.cache_attributes_self:
self.cache_attributes_self = attributes
return attributes
def get_tapurl_dict(self):
tapdict = {
'access_url': self.access_url,
'query_template': self.query_template,
'attributes': self.attribute_tapcolumns,
'attribute_names': self.attribute_names,
'language': self.language,
'execution_mode': self.execution_mode,
'resource': self.resource,
'and_clauses': [],
'row_identifiers': self.row_identifiers,
}
return tapdict
def create_empty_datasuper(self):
"""
Overloaded to set the .row_identifiers of the created TC.
Not working properly yet.
TODO MT H [NOCAT]: Design a generic way to do this.
"""
super(self.__class__, self).create_empty_datasuper()
attrs = self.get_attributes_full(cache=True)
rowidsd = {}
for attr in attrs:
if attr['tapcolumn'] in self.row_identifiers:
rowidsd[attr['tapcolumn']] = attr
rowidsl = [rowidsd[atc]['name'] for atc in self.row_identifiers]
self.datasuper.row_identifiers = rowidsl
return self.datasuper
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment