Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Hugo Buddelmeijer
sourcecollection-dev
Commits
35d65ae2
Commit
35d65ae2
authored
Feb 03, 2011
by
Hugo Buddelmeijer
Browse files
+ Experimental SourceCollection class to access IVOA TAP resources as if
they are any other SourceCollection.
parent
06678228
Changes
1
Hide whitespace changes
Inline
Side-by-side
ExternalTAP/ExternalTAP.py
0 → 100644
View file @
35d65ae2
# -*- coding: utf-8 -*-
__version__
=
'@(#)$Revision$'
from
common.database.Database
import
database
from
common.database.DBMain
import
DBObject
,
persistent
from
common.log.Message
import
Message
from
common.log.Error
import
Error
import
datetime
,
os
import
urllib
from
SourceCollection
import
SourceCollection
from
astro.util.SetRelations
import
SetRelations
,
SetRelationsSet
from
astro.util.TableConverter
import
TableConverter
"""
This file contains the experimental ExternalTAP class, a SourceCollection to
access IVOA TAP Resources as if they are local SourceCollections.
# Below is a workaround to get a fake persistent class.
# Import it as:
from astro.main.sourcecollection.ExternalTAP import External as ExternalTAP
# Correct way:
class ExternalTap(SourceCollection):
resource = persistent('IVO URI of Resource [None]', str, '')
# Example: "ivo://wfau.roe.ac.uk/sdssdr7-dsa"
access_url = persistent('HTTP URI of Resource [None]', str, '')
# Example: "http://wfaudata.roe.ac.uk/sdssdr7-dsa/TAP/sync"
execution_mode = persistent('Execution mode', str, 'sync', '')
# 'sync' or 'async', should match the access_url
# Only 'sync' supported.
language = persistent('TAP Language to use [None]', str, 'ADQL')
# Only 'ADQL' is supported at the moment.
query_template = persistent('Template to use for query', str, '')
# A string that can be modified by other operators.
# The final table to select the sources/attributes from should be aliased to 't'.
# Example "SELECT %(select_clause)s FROM SpecObjAll AS t %(where_clause)s"
attribute_tapcolumns = persistent('Column names of the attributes in the TAP resource.', str, [])
# Should be in the same order as attribute_columns and attribute_names
row_identifiers = persistent('List of columns whose combination uniquely identifies a row.', str, [])
# Can be used to check which rows have not yet been fetched or stored
# in the database.
"""
from
External
import
External
as
ExternalBase
class
External
(
ExternalBase
):
"""
The experimental ExternalTAP is a SourceCollection to access IVOA TAP
Resources as if they are local SourceCollections.
The ExternalTAP points to a IVOA TAP resources from which it can fetch
data. It has a local sourcelist_data to store this data, which can be
shared with SCs further in the tree.
E.g. An ExternalTAP can point to the PhotoObjAll table of an SDSS resource,
but not store any catalog data initially. A FilterSources can be created
with this ExternalTAP as parent, sharing the sourcelist_data. Once the
catalog data of the FS is retrieved, it can be stored in the same
sourcelist_data.
TAP functionality adds another 'layer' to evaluate operators on (next to
the Oracle SQL and the Python layers). To facilitate this, the following
functions are created:
- get_tapurl_dict(): Creates a dictionary from which the TAP request can
be generated. Similar to get_query_with_clauses and get_query_self.
This dictionary can be modified by the children of an SC.
Implemented in ExternalTAP, FilterSources.
- get_tapurl(): Creates the actual URL from the tapurl_dict to request the
catalog data.
Implement in SourceCollection.py
- load_data_tap(): Retrieves the catalog over TAP. Works only for trees
that contain SCs that support get_tapurl_dict().
Implement in SourceCollection.py
TODO [TAP]:
- Improve the attribute handling. Necessary to support other SCs,
e.g. RenameAttributes, SelectAttributes.
- Implement get_tapurl_dict() in other SourceCollections.
- Properly implement row_identifiers globally in the SCs. Required for
storing data. And also for possible future source extraction operators.
- Create true persistent class.
- Implement storing of catalog data. This is not yet possible because the
stored sources have a SID, but the newly fetched do not yet.
- Integrate TAP support with the SourceCollectionTree:
- Load data over TAP through load_data()
- Include optimization rules for TAP queries. (E.g. do not convert
FilterSources into SelectSources etc.)
- Determine wether we can use WITH clauses in ADQL queries.
- Implement 'async' execution mode.
- Implement ConeSearch?
"""
resource
=
None
access_url
=
None
execution_mode
=
None
language
=
None
query_template
=
None
attribute_tapcolumns
=
None
row_identifiers
=
None
def
__init__
(
self
):
super
(
self
.
__class__
,
self
).
__init__
()
self
.
execution_mode
=
'sync'
self
.
language
=
'ADQL'
self
.
attribute_tapcolumns
=
[]
def
get_attributes_full
(
self
,
cache
=
False
):
"""
Use the resource to get information about the attributes.
TODO MT M [TAP]:
- Automatically infer attribute_tapcolumns, attribute_names and
attribute_columns from the resource. E.g. to create sourcelist_data
automatically.
-
"""
# Return the cache if possible.
if
self
.
cache_attributes_self
and
cache
:
return
self
.
cache_attributes_self
tapdict
=
self
.
get_tapurl_dict
()
if
not
tapdict
:
return
False
attrs
=
', '
.
join
(
't.'
+
a
for
a
in
tapdict
[
'attributes'
])
# Only one row is needed
attrs
=
' TOP 1 '
+
attrs
whereclause
=
''
qq1
=
tapdict
[
'query_template'
]
%
{
'select_clause'
:
attrs
,
'where_clause'
:
whereclause
}
print
qq1
qq
=
urllib
.
quote
(
qq1
)
trurl
=
"%(access_url)s?REQUEST=doQuery&LANG=%(language)s&QUERY=%(query)s"
%
{
'access_url'
:
tapdict
[
'access_url'
],
'language'
:
tapdict
[
'language'
],
'query'
:
qq
,
}
tu
=
urllib
.
urlopen
(
trurl
)
tapvot
=
tu
.
read
()
tc
=
TableConverter
()
tc
.
load_votable_string
(
tapvot
)
self
.
debug_attributesfull_trurl
=
trurl
self
.
debug_attributesfull_tapvot
=
tapvot
self
.
debug_attributesfull_tc
=
tc
attributes
=
[]
for
tapcol
,
col
,
name
in
zip
(
self
.
attribute_tapcolumns
,
self
.
attribute_columns
,
self
.
attribute_names
):
attr
=
{
'name'
:
name
,
'column'
:
col
,
'tapcolumn'
:
tapcol
,
'format'
:
tc
.
attributes
[
tapcol
][
'format'
],
'ucd'
:
tc
.
attributes
[
tapcol
][
'ucd'
],
'null'
:
tc
.
attributes
[
tapcol
][
'null'
],
'length'
:
tc
.
attributes
[
tapcol
][
'length'
],
# 'original', 'from', 'fromalias'
}
attributes
.
append
(
attr
)
# Cache the attributes.
self
.
debug_attributes
=
attributes
# Cache the data if requested or if data was cached earlier to ensure
# it is up to date.
if
cache
or
self
.
cache_attributes_self
:
self
.
cache_attributes_self
=
attributes
return
attributes
def
get_tapurl_dict
(
self
):
tapdict
=
{
'access_url'
:
self
.
access_url
,
'query_template'
:
self
.
query_template
,
'attributes'
:
self
.
attribute_tapcolumns
,
'attribute_names'
:
self
.
attribute_names
,
'language'
:
self
.
language
,
'execution_mode'
:
self
.
execution_mode
,
'resource'
:
self
.
resource
,
'and_clauses'
:
[],
'row_identifiers'
:
self
.
row_identifiers
,
}
return
tapdict
def
create_empty_datasuper
(
self
):
"""
Overloaded to set the .row_identifiers of the created TC.
Not working properly yet.
TODO MT H [NOCAT]: Design a generic way to do this.
"""
super
(
self
.
__class__
,
self
).
create_empty_datasuper
()
attrs
=
self
.
get_attributes_full
(
cache
=
True
)
rowidsd
=
{}
for
attr
in
attrs
:
if
attr
[
'tapcolumn'
]
in
self
.
row_identifiers
:
rowidsd
[
attr
[
'tapcolumn'
]]
=
attr
rowidsl
=
[
rowidsd
[
atc
][
'name'
]
for
atc
in
self
.
row_identifiers
]
self
.
datasuper
.
row_identifiers
=
rowidsl
return
self
.
datasuper
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment