@inproceedings( vldb:LC94, author = "Wen-Syan Li and Chris Clifton", title = {{Semantic Integration in Heterogeneous Databases Using Neural Networks}}, booktitle = "Proceedings of the Twentieth International Conference on Very Large Databases", year = 1994, address = "Santiago, Chile", pages = "1--12", abstract = { One important step in integrating heterogeneous databases is matching equivalent attributes: Determining which fields in two databases refer to the same data. The meaning of information may be embodied within a database model, a conceptual schema, application programs, or data contents. Integration involves extracting semantics, expressing them as metadata, and matching semantically equivalent data elements. We present a procedure using a classifier to categorize attributes according to their field specifications and data values, then train a neural network to recognize similar attributes. In our technique, the knowledge of how to match equivalent data elements is ``discovered'' from metadata, not ``pre-programmed''.} ) @inproceedings( vldb:IGC94, author = "N.B. Idris and W.A. Gray and R.F. Churchhouse", title = {{Providing Dynamic Security Control in a Federated Database}}, booktitle = "Proceedings of the Twentieth International Conference on Very Large Databases", year = 1994, address = "Santiago, Chile", pages = "13--23", abstract = { When data is being used in a federated database, the aim is to give a loose coupling of the data in the component databases so that a very dynamic and therefore flexible pattern of data sharing can be established. When security integration is performed this flexibility is curtailed by the resultant security level established at integration time which by default is the least upper bound between candidate security levels. Such overclassification of data implies that there will be authorised users who are debarred at the federation level to access the data. To circumvent this problem there is a need for a dynamic mandate type control for definite periods of the federated system's existence. An approach to establishing such temporary dynamic security control is described in this paper. It is an adaptation of Shamir's method [Shamir79] for sharing a secret, and it aims to let users who are debarred at the default security level from access to particular data, gain access to this data under local control if an appropriate combination of current database administrator of the system are prepared to grant the access dynamically.} ) @inproceedings( vldb:JD94, author = "Dirk Jonscher and Klaus R. Dittrich", title = {{An Approach for Building Secure Database Federations}}, booktitle = "Proceedings of the Twentieth International Conference on Very Large Databases", year = 1994, address = "Santiago, Chile", pages = "24--35", abstract = { Database federations give rise to particular security problems which are not present in classical database environments. The problems and solutions heavily depend on the federation's architecture and the degree of heterogeneity of participating component systems. In this paper we discuss a special aspect of security, namely access control for tightly coupled federations. We determine the typical problems to be solved and discuss several solutions providing for different degrees of local autonomy, especially authorisation autonomy. In particular, we describe the interaction between independent reference monitors. Further, we sketch powerful access control mechanisms to be applied at the global layer and show how they can be mapped onto less powerful mechanisms of component database management systems.} ) @inproceedings( vldb:HM94a, author = "Waqar Hasan and Rajeev Motwani", title = {{Optimization Algorithms for Exploiting the Parallelism-Communication Tradeoff in Pipelined Parallelism}}, booktitle = "Proceedings of the Twentieth International Conference on Very Large Databases", year = 1994, address = "Santiago, Chile", pages = "36--47", abstract = { We address the problem of finding parallel plans for SQL queries using the two-phase approach of join ordering followed by parallelization. We focus on the parallelization phase and develop algorithms for exploiting pipelined parallelism. We formulate parallelization as scheduling a weighted operator tree to minimize response time. Our model of response time captures the fundamental tradeoff between parallel execution and its communication overhead. We assess the quality of an optimization algorithm by its {\em performance ratio} which is the ratio of the response time of the generated schedule to that of the optimal. We develop fast algorithms that produce near-optimal schedules -- the performance ratio is extremely close to 1 on the average and has a worst case bound of about 2 for many cases.} ) @inproceedings( vldb:JLR94, author = "H.V. Jagadish and Daniel Lieuwen and Rajeev Rastogi and Avi Silberschatz and S. Sudarshan", title = {{Dal{\'\i}: A High Performance Main Memory Storage Manager}}, booktitle = "Proceedings of the Twentieth International Conference on Very Large Databases", year = 1994, address = "Santiago, Chile", pages = "48--59", abstract = { Performance needs of many database applications dictate that the entire database be stored in main memory. The Dal\'{\i} system is a main memory storage manager designed to provide the persistence, availability and safety guarantees one typically expects from a disk-resident database, while at the same time providing very high performance by virtue of being tuned to support in-memory data. Dal\'{\i} follows the philosophy of treating all data, including system data, uniformly as database files that can be memory mapped and directly accessed/updated by user processes. Direct access provides high performance; slower, but more secure, access is also provided through the use of a server process. Various features of Dal\'{\i} can be tailored to the needs of an application to achieve high performance --- for example, concurrency control and logging can be turned off if not desired, which enables Dal\'{\i} to efficiently support applications that require non-persistent memory resident data to be shared by multiple processes. Both object-oriented and relational databases can be implemented on top of Dal\'{\i}.} ) @inproceedings( vldb:MS94b, author = "Mukesh K. Mohania and N.L. Sarda", title = {{Some Issues in Design of Distributed Deductive Databases}}, booktitle = "Proceedings of the Twentieth International Conference on Very Large Databases", year = 1994, address = "Santiago, Chile", pages = "60--71", abstract = { The design of a distributed deductive database system differs from the design of conventional non-distributed deductive database systems in that it requires design of distribution of both the database and rulebase. In this paper, we address the rule allocation problem. We consider minimization of data communication cost during rule execution as a primary basis for rule allocation. The rule allocation problem can be stated in terms of a directed acyclic graph, where nodes represent rules or relations, and edges represent either dependencies between rules or usage of relations by rules. The arcs are given weights representing volume of data that need to flow between the connected nodes. We show that rule allocation problem is NP-complete. Next, we propose a heuristic for nonreplicated allocation based on successively combining adjacent nodes for placement at same site which are connected by highest weight edge, and study its performance vis-a-vis the enumerative algorithm for optimal allocation. Our results show that the heuristic produces acceptable allocations. We also extend our heuristic for partially replicated allocation.} ) @inproceedings( vldb:BMCL94, author = "Kurt P. Brown and Manish Mehta and Michael J. Carey and Miron Livny", title = {{Towards Automated Performance Tuning For Complex Workloads}}, booktitle = "Proceedings of the Twentieth International Conference on Very Large Databases", year = 1994, address = "Santiago, Chile", pages = "72--84", abstract = { In this paper we explore the problem of automatically adjusting DBMS multiprogramming levels and memory allocations in order to achieve a set of per-class response time goals for a complex multiclass workload. We start by describing the phenomena that make this a very challenging problem, the foremost of which is the interdependence between classes that results from their competition for shared resources. We then describe M\&M, a feedback-based algorithm for simultaneously determining the MPL and memory settings for each class independently, and we evaluate the algorithm's effectiveness using a detailed simulation model. We show that our algorithm can successfully achieve response times that are within a few percent of the goals for mixed workloads consisting of short transactions and longer-running ad hoc join queries.} ) @inproceedings( vldb:GLPK94, author = "C{\'e}sar Galindo-Legaria and Arjan Pellenkoft and Martin Kersten", title = {{Fast, Randomized Join-Order Selection -- Why Use Transformations?}}, booktitle = "Proceedings of the Twentieth International Conference on Very Large Databases", year = 1994, address = "Santiago, Chile", pages = "85--95", abstract = { We study the effectiveness of probabilistic selection of join-query evaluation plans, without reliance on tree transformation rules. Instead, each candidate plan is chosen uniformly at random from the space of valid evaluation orders. This leads to a transformation-free strategy where a sequence of random plans is generated and the plans are compared on their estimated costs. The success of this strategy depends on the ratio of ``good'' evaluation plans in the space of alternatives, the efficient generation of random candidates, and an accurate estimation of their cost. To avoid a biased exploration of the space, we solved the open problem of efficiently generating random, uniformly-distributed evaluation orders, for queries with acyclic graphs. This benefits any optimization or sampling scheme in which a random choice of (initial) query plans is required. A direct comparison with iterative improvement and simulated annealing, using a proven cost-evaluator, shows that our transformation-free strategy converges faster and yields solutions of comparable cost.} ) @inproceedings( vldb:LMS94, author = "Alon Y. Levy and Inderpal Singh Mumick and Yehoshua Sagiv", title = {{Query Optimization by Predicate Move-Around}}, booktitle = "Proceedings of the Twentieth International Conference on Very Large Databases", year = 1994, address = "Santiago, Chile", pages = "96--107", abstract = { A new type of optimization, called Predicate Move-around, is introduced. It is shown how this optimization considerably improves the efficiency of evaluating SQL queries that have query graphs with a large number of query blocks (which is a typical situation when queries are defined in terms of multiple views and subqueries). Predicate move-around works by moving predicates across query blocks (in the query graph) that cannot be merged into one block. Predicate move-around is a generalization of and has many advantages over the traditional predicate pushdown. One key advantage arises from the fact that predicate move-around precedes pushdown by pulling predicates up the query graph. As a result, predicates that appear in the query in one part of the graph can be moved around the graph and applied also in other parts of graph. Moreover, predicate move-around optimization can move a wider class of predicates in a wider class of queries as compared to the standard predicate-pushdown techniques. In addition to the usual comparison and arithmetic predicates, other predicates that can be moved around are the EXISTS and NOT EXISTS clauses, the EXCEPT clause, and functional dependencies. The proposed optimization can also move predicates through aggregation. Moreover, the method can also infer new predicates when existing predicates are moved through aggregation or when certain functional dependencies are known to hold. Finally, the predicate move-around algorithm is easy to implement on top of existing query optimizers.} ) @inproceedings( vldb:ABDS94, author = "Eric Amiel and Marie-Jo Bellosta and Eric Dujardin and Eric Simon", title = {{Supporting Exceptions to Behavioral Schema Consistency to Ease Schema Evolution in OODBMS}}, booktitle = "Proceedings of the Twentieth International Conference on Very Large Databases", year = 1994, address = "Santiago, Chile", pages = "108--119", abstract = { Object-oriented databases enforce schema consistency rules to guarantee type safety, i.e., that no run-time type error can occur. When the schema must evolve, some schema updates may violate these rules. In order to maintain complete schema consistency, traditional solutions require significant changes to the types, the type hierarchy and the code of existing methods. Such operations are very expensive in a database context. To ease schema evolution, we propose to support exceptions to the consistency rules without sacrificing type safety for all that. The basic idea is to detect unsafe statements at compile-time and check them at run-time. The run-time check is performed by a specific clause that is automatically inserted around unsafe statements. This check clause warns the programmer of the safety problem and lets him provide exception-handling code. This way, some schema updates can be performed with only minor changes to the code of methods.} ) @inproceedings( vldb:WN94, author = "Janet L. Wiener and Jeffrey F. Naughton", title = {{Bulk Loading into an OODB: A Performance Study}}, booktitle = "Proceedings of the Twentieth International Conference on Very Large Databases", year = 1994, address = "Santiago, Chile", pages = "120--131", abstract = { Object-oriented database (OODB) users bring with them large quantities of legacy data (megabytes and even gigabytes). In addition, scientific OODB users continually generate new data. All this data must be loaded into the OODB. Every relational database system has a load utility, but most OODBs do not. The process of loading data into an OODB is complicated by inter-object references, or relationships, in the data. These relationships are expressed in the OODB as object identifiers, which are not known at the time the load data is generated; they may contain cycles; and there may be implicit system-maintained inverse relationships that must also be stored. We introduce seven algorithms for loading data into an OODB that examine different techniques for dealing with circular and inverse relationships. We present a performance study based on both an analytic model and an implementation of all seven algorithms on top of the Shore object repository. Our study demonstrates that it is important to choose a load algorithm carefully; in some cases the best algorithm achieved an improvement of one to two orders of magnitude over the naive algorithm.} ) @inproceedings( vldb:CCS94, author = "C. Collet and T. Coupaye and T. Svensen", title = {{NAOS -- Efficient and Modular Reactive Capabilities in an Object-Oriented Database System}}, booktitle = "Proceedings of the Twentieth International Conference on Very Large Databases", year = 1994, address = "Santiago, Chile", pages = "132--143", abstract = { This paper describes the design and implementation of NAOS, an active rule component in the object-oriented database system O$_{2}$. The contribution of this work is related to two main aspects. The first concerns the integration of the rule concept within the O$_{2}$~model, providing a way to structure applications. Rules are part of a schema and do not belong to a class. Program execution and data manipulation, including method calls, can be driven on rules. The second aspect concerns the way NAOS interacts with the kernel of the O$_{2}$~system. To support a reactive capability the object manager semantics has been extended, thus providing an efficient event detection. Applications produce events and the subscribed event types react to these events. As a result, rules are triggered.} ) @inproceedings( vldb:NH94, author = "Raymond T. Ng and Jiawei Han", title = {{Efficient and Effective Clustering Methods for Spatial Data Mining}}, booktitle = "Proceedings of the Twentieth International Conference on Very Large Databases", year = 1994, address = "Santiago, Chile", pages = "144--155", abstract = { Spatial data mining is the discovery of interesting relationships and characteristics that may exist implicitly in spatial databases. In this paper, we explore whether clustering methods have a role to play in spatial data mining. To this end, we develop a new clustering method called CLARANS which is based on randomized search. We also develop two spatial data mining algorithms that use CLARANS. Our analysis and experiments show that with the assistance of CLARANS, these two algorithms are very effective and can lead to discoveries that are difficult to find with current spatial data mining algorithms. Furthermore, experiments conducted to compare the performance of CLARANS with that of existing clustering methods show that CLARANS is the most efficient.} ) @inproceedings( vldb:HS94, author = "Erik G. Hoel and Hanan Samet", title = {{Performance of Data-Parallel Spatial Operations}}, booktitle = "Proceedings of the Twentieth International Conference on Very Large Databases", year = 1994, address = "Santiago, Chile", pages = "156--167", abstract = { The performance of data-parallel algorithms for spatial operations using data-parallel variants of the bucket PMR quadtree, R-tree, and R$^+$-tree spatial data structures is compared. The studied operations are data structure build, polygonization, and spatial join in an application domain consisting of planar line segment data. The algorithms are implemented using the scan model of parallel computation on the hypercube architecture of the Connection Machine. The results of experiments reveal that the bucket PMR quadtree outperforms both the R-tree and R$^+$-tree. This is primarily because the bucket PMR quadtree yields a regular disjoint decomposition of space while the R-tree and R$^+$-tree do not. The regular disjoint decomposition increases the potential for interprocessor communication and parallelism in the bucket PMR quadtree, thereby enabling the execution times to decrease relative to those needed by the R-tree and R$^+$-tree.} ) @inproceedings( vldb:BK94, author = "Thomas Brinkhoff and Hans-Peter Kriegel", title = {{The Impact of Global Clustering on Spatial Database Systems}}, booktitle = "Proceedings of the Twentieth International Conference on Very Large Databases", year = 1994, address = "Santiago, Chile", pages = "168--179", abstract = { Global clustering has rarely been investigated in the area of spatial database systems although dramatic performance improvements can be achieved by using suitable techniques. In this paper, we propose a simple approach to global clustering called cluster organization. We will demonstrate that this cluster organization leads to considerable performance improvements without any algorithmic overhead. Based on real geographic data, we perform a detailed empirical performance evaluation and compare the cluster organization to other organization models not using global clustering. We will show that global clustering speeds up the processing of window queries as well as spatial joins without decreasing the performance of the insertion of new objects and of selective queries such as point queries. The spatial join is sped up by a factor of about 4, whereas non-selective window queries are accelerated by even higher speed up factors.} ) @inproceedings( vldb:KM94, author = "Christoph Kilger and Guido Moerkotte", title = {{Indexing Multiple Sets}}, booktitle = "Proceedings of the Twentieth International Conference on Very Large Databases", year = 1994, address = "Santiago, Chile", pages = "180--191", abstract = { We examine the performance of B+-tree based index structures for multiple sets, as developed in the context of object bases. Index structures for multiple sets can be classified into those that group entries according to their key value and those that group entries according to their set membership. The former are particularly suited for exact match queries on all indexed sets, the latter especially support range queries on a small number of all indexed sets. The goal is to thoroughly evaluate the performance of both grouping strategies. There exist two good reasons for adding a new index structure to the evaluation: \begin{itemize} \item The performance potentials of set grouping index structures are not yet fully exploited. \item Up to now, the database administrator has to choose between the key grouping and the set grouping index structures. If the application profile consists of both, exact match queries and range queries, this is not really a choice. Hence, a more flexible index structure is needed which can be tuned to a given mix containing both, exact match and range queries. \end{itemize} These two reasons led us to the development of the CG-tree. The focus of the paper is on introducing the CG-tree and on a thorough analysis of the performance of the CH-index, H-tree, and CG-tree under various conditions.} ) @inproceedings( vldb:BCC94, author = "Eric W. Brown and James P. Callan and W. Bruce Croft", title = {{Fast Incremental Indexing for Full-Text Information Retrieval}}, booktitle = "Proceedings of the Twentieth International Conference on Very Large Databases", year = 1994, address = "Santiago, Chile", pages = "192--202", abstract = { Full-text information retrieval systems have traditionally been designed for archival environments. They often provide little or no support for adding new documents to an existing document collection, requiring instead that the entire collection be re-indexed. Modern applications, such as information filtering, operate in dynamic environments that require frequent additions to document collections. We provide this ability using a traditional inverted file index built on top of a persistent object store. The data management facilities of the persistent object store are used to produce efficient incremental update of the inverted lists. We describe our system and present experimental results showing superior incremental indexing and competitive query processing performance. Keywords: full-text document retrieval, incremental indexing, persistent object store, performance} ) @inproceedings( vldb:SS94, author = "B. Sreenath and S. Seshadri", title = {{The hcC-tree: An Efficient Index Structure for Object Oriented Databases}}, booktitle = "Proceedings of the Twentieth International Conference on Very Large Databases", year = 1994, address = "Santiago, Chile", pages = "203--213", abstract = { Object oriented database systems, in contrast to traditional relational database systems, allow the scope of a query against a class to be either the class itself or all classes in the class hierarchy rooted at the class. If object oriented databases have to achieve acceptable performance levels against such queries, we need indexes that support efficient retrieval of instances from a single class as well as from all the classes in a class hierarchy. In this paper, we propose a new index structure called hcC-tree (hierarchy class Chain tree) that supports both kinds of retrieval efficiently. Moreover, the update cost of the index structure is bounded by the height of the hcC-tree (which is usually never more than four). We have implemented hcC-trees along with H-trees and CH-trees (two other index structures that have been proposed in the literature) and report a detailed performance analysis of the three structures. The performance study reveals that hcC-trees perform much better than the other two structures under most circumstances. The balanced behaviour of hcC-tree under all kinds of queries and in the presence of updates shows that it is a promising index structure for the future.} ) @inproceedings( vldb:cFC94, author = "Ada Wai-chee Fu and David Wai-Lok Cheung", title = {{A Transaction Replication Scheme for a Replicated Database with Node Autonomy}}, booktitle = "Proceedings of the Twentieth International Conference on Very Large Databases", year = 1994, address = "Santiago, Chile", pages = "214--225", abstract = { Many proposed protocols for replicated databases consider centralized control of each transaction so that given a transaction, some site will monitor the remote data access and transaction commit. We consider the approach of broadcasting transactions to remote sites and handling these transactions in their complete form at each site. We consider data of two types: shared-private data and public data and show that transactions working only on shared-private data can be executed under a local concurrency control protocol. We assume a synchronized network with with possibilities of partition failures. We show that in our scheme transaction execution can be managed with less communication delay compared to centralized transaction control.} ) @inproceedings( vldb:OAB94, author = "M. Ouzzani and M.A. Atroun and N.L. Belkhodja", title = {{A Top-Down Approach for Two Level Serializability}}, booktitle = "Proceedings of the Twentieth International Conference on Very Large Databases", year = 1994, address = "Santiago, Chile", pages = "226--237", abstract = { Concurrency control has received considerable attention in multidatabase systems because of their characteristics such as heterogeneity and autonomy. Particularly, various concurrency control protocols have been developed in the literature. In this paper, we present a protocol that guarantees the two level serializability criterion and built up according to the top down approach.} ) @inproceedings( vldb:SZ94, author = "V.W. Setzer and A. Zisman", title = {{New Concurrency Control Algorithms for Accessing and Compacting B-Trees}}, booktitle = "Proceedings of the Twentieth International Conference on Very Large Databases", year = 1994, address = "Santiago, Chile", pages = "238--248", abstract = { This paper initially presents a brief but fairly exhaustive survey of solutions to the concurrency control problem for B-trees. We then propose a new solution, which is characterized by the use of variable-length indices, the employment of a single lock type for the usual access operations and preemptive splits as well as delayed catenations and subdivisions. We also introduce a new compaction algorithm and its concurrent execution, using a new lock type.} ) @inproceedings( vldb:GJR94, author = "N.H. Gehani and H.V. Jagadish and W.D. Roome", title = {{OdeFS: A File System Interface to an Object-Oriented Database}}, booktitle = "Proceedings of the Twentieth International Conference on Very Large Databases", year = 1994, address = "Santiago, Chile", pages = "249--260", abstract = { OdeFS is a file-like interface to the Ode object-oriented database. OdeFS allows database objects to be accessed and manipulated with standard commands, just like files in a traditional file system. No recompilation is required, so proprietary applications can access Ode objects. OdeFS is implemented as a network file server, using the NFS protocol. This paper describes OdeFS and its implementation.} ) @inproceedings( vldb:FMZ94, author = "Fabrizio Ferrandina and Thorsten Meyer and Roberto Zicari", title = {{Implementing Lazy Database Updates for an Object Database System}}, booktitle = "Proceedings of the Twentieth International Conference on Very Large Databases", year = 1994, address = "Santiago, Chile", pages = "261--272", abstract = { Current object database management systems support user-defined conversion functions to update the database once the schema has been modified. Two main strategies are possible when implementing such database conversion functions: immediate or lazy database updates. In this paper, we concentrate our attention to the definition of implementation strategies for conversion functions implemented as lazy database updates.} ) @inproceedings( vldb:FLU94, author = "J{\"u}rgen Frohn and Georg Lausen and Heinz Uphoff", title = {{Access to Objects by Path Expressions and Rules}}, booktitle = "Proceedings of the Twentieth International Conference on Very Large Databases", year = 1994, address = "Santiago, Chile", pages = "273--284", abstract = { Object oriented databases provide rich structuring capabilities to organize the objects being relevant for a given application. Due to the possible complexity of object structures, path expressions have become accepted as a concise syntactical means to reference objects. Even though known approaches to path expressions provide quite elegant access to objects, there seems to be still a need for more generality. To this end, the rule-language PathLog is introduced. A first contribution of PathLog is to add a second dimension to path expressions in order to increase conciseness. In addition, a path expression can also be used to reference virtual objects. Both enhancements give rise to interesting semantic implications.} ) @inproceedings( vldb:TPC94, author = "M. Teisseire and P. Poncelet and R. Cicchetti", title = {{Towards Event-Driven Modelling for Database Design}}, booktitle = "Proceedings of the Twentieth International Conference on Very Large Databases", year = 1994, address = "Santiago, Chile", pages = "285--296", abstract = { This paper is devoted to the dynamic aspect of the IFO2 conceptual model, an extension of the semantic IFO model defined by S. Abiteboul and R. Hull. Its original aspects are a ``whole-event'' approach, the use of constructors to express combinations of events, and the modularity and re-usability of specifications in order to optimize the designer's work. Furthermore, it offers an overview of the represented behaviour. To complement the modelling part, IFO2 includes a derivation component which performs the implementation of specifications by using an active DBMS.} ) @inproceedings( vldb:Gut94, author = "Ralf Hartmut G{\"u}ting", title = {{GraphDB: Modeling and Querying Graphs in Databases}}, booktitle = "Proceedings of the Twentieth International Conference on Very Large Databases", year = 1994, address = "Santiago, Chile", pages = "297--308", abstract = { We propose a data model and query language that integrates an explicit modeling and querying of graphs smoothly into a standard database environment. For standard applications, some key features of object- oriented modeling are offered such as object classes organized into a hierarchy, object identity, and attributes referencing objects. Querying can be done in a familiar style with a derive statement that can be used like a \mbox{select \ldots from \ldots where}. On the other hand, the model allows for an explicit representation of graphs by partitioning object classes into simple classes, link classes, and path classes whose objects can be viewed as nodes, edges, and explicitly stored paths of a graph (which is the whole database instance). For querying graphs, the derive statement has an extended meaning in that it allows one to refer to subgraphs of the database graph. A powerful rewrite operation is offered for the manipulation of heterogeneous sequences of objects which often occur as a result of accessing the database graph. Additionally there are special graph operations like determining a shortest path or a subgraph and the model is extensible by such operations. Besides being attractive for standard applications, the model permits a natural representation and sophisticated querying of networks, in particular of spatially embedded networks like highways, public transport, etc.} ) @inproceedings( vldb:GL94, author = "Terry Gaasterland and Jorge Lobo", title = {{Qualified Answers That Reflect User Needs and Preferences}}, booktitle = "Proceedings of the Twentieth International Conference on Very Large Databases", year = 1994, address = "Santiago, Chile", pages = "309--320", abstract = { This paper introduces a formalism to describe the needs and preferences of database users. Because of the precise formulation of these concepts, we have found an automatic and {\em very simple} mechanism to incorporate user needs and preferences into the query answering process. In the formalism, the user provides a lattice of domain independent values that define preferences and needs and a set of domain specific {\em user constraints} qualified with lattice values. The constraints are automatically incorporated into a relational or deductive database through a series of syntactic transformations that produces an annotated deductive database. Query answering procedures for deductive databases are then used, with minor modifications, to obtain annotated answers to queries. Because preference declaration is separated from data representation and management, preferences can be easily altered without touching the database. Also, the query language allows users to ask for answers at different preference levels. An extended example shows how these methods are used to handle large quantities of DNA sequence data.} ) @inproceedings( vldb:MCD94, author = "Maur{\'\i}cio R. Mediano and Marco A. Casanova and Marcelo Dreux", title = {{V-Trees -- A Storage Method for Long Vector Data}}, booktitle = "Proceedings of the Twentieth International Conference on Very Large Databases", year = 1994, address = "Santiago, Chile", pages = "321--330", abstract = { This paper introduces a new data structure, called {\em V-trees}, designed to store long sequences of points in 2D space and yet allow efficient access to their fragments. They also optimize access to a sequence of points when the query involves changes to a smaller scale. V-trees operate in much the same way as positional B-trees do in the context of long fields and they can be viewed as a variant of R-trees. The design of V-trees was motivated by the problem of storing and retrieving geographic objects that are fairly long, such as river margins or political boundaries, and the fact that geographic queries typically access just fragments of such objects, frequently using a smaller scale.} ) @inproceedings( vldb:CCY94, author = "Sudarshan S. Chawathe and Ming-Syan Chen and Philip S. Yu", title = {{On Index Selection Schemes for Nested Object Hierarchies}}, booktitle = "Proceedings of the Twentieth International Conference on Very Large Databases", year = 1994, address = "Santiago, Chile", pages = "331--341", abstract = { In this paper we address the problem of devising a set of indexes for a nested object hierarchy in an object-oriented database to improve the overall system performance. It is noted that the effects of two indexes could be entangled in that the inclusion of one index might affect the benefit achievable by the other index. Such a phenomenon is termed index interaction. Clearly, the effect of index interaction needs to be taken into consideration when a set of indexes is being built. The index selection problem is first formulated and four index selection algorithms are evaluated via simulation. The effects of different objective functions, which guide the search in the index selection algorithms, are also investigated. It is shown by simulation results that the greedy algorithm which is devised in light of the phenomenon of index interaction performs fairly well in most cases. Sensitivity analysis for various database parameters is conducted.} ) @inproceedings( vldb:LNS94, author = "W. Litwin and M-A Neimat and D. Schneider", title = {{RP*: A Family of Order Preserving Scalable Distributed Data Structures}}, booktitle = "Proceedings of the Twentieth International Conference on Very Large Databases", year = 1994, address = "Santiago, Chile", pages = "342--353", abstract = { Hash-based scalable distributed data structures (SDDSs), like LH* and DDH, for networks of interconnected computers (multicomputers) were shown to open new perspectives for file management. We propose a family of ordered SDDSs, called RP*, providing for ordered and dynamic files on multicomputers, and thus for more efficient processing of range queries and of ordered traversals of files. The basic algorithm, termed RP*N, builds the file with the same key space partitioning as a B-tree, but avoids indexes through the use of multicast. The algorithms, RP*C and RP*S enhance throughput for faster networks, adding indexes on clients, or on clients and servers, while either decreasing or avoiding multicast. RP* files are shown highly efficient with access performance exceeding traditional files by an order of magnitude or two, and, for non-range queries, very close to LH*.} ) @inproceedings( vldb:CS94, author = "Surajit Chaudhuri and Kyuseok Shim", title = {{Including Group-By in Query Optimization}}, booktitle = "Proceedings of the Twentieth International Conference on Very Large Databases", year = 1994, address = "Santiago, Chile", pages = "354--366", abstract = { In existing relational database systems, processing of group-by and computation of aggregate functions are always postponed until all joins are performed. In this paper, we present transformations that make it possible to push group-by operation past one or more joins and can potentially reduce the cost of processing a query significantly. Therefore, the placement of group-by should be decided based on cost estimation. We explain how the traditional System-R style optimizers can be modified by incorporating the "greedy conservative heuristic" that we developed. We prove that applications of greedy conservative heuristic produce plans that are better (or no worse) than the plans generated by a traditional optimizer. Our experimental study shows that the extent of improvement in the quality of plans is significant with only a modest increase in optimization cost. Our technique also applies to optimization of ``Select Distinct'' queries by pushing down duplicate elimination in a cost-based fashion.} ) @inproceedings( vldb:TSI94, author = "Odysseas G. Tsatalos and Marvin H. Solomon and Yannis E. Ioannidis", title = {{The GMAP: A Versatile Tool for Physical Data Independence}}, booktitle = "Proceedings of the Twentieth International Conference on Very Large Databases", year = 1994, address = "Santiago, Chile", pages = "367--378", abstract = { Physical data independence is touted as a central feature of modern database systems. Both relational and object-oriented systems, however, force users to frame their queries in terms of a logical schema that is directly tied to physical structures. Our approach eliminates this dependence. All storage structures are defined in a declarative language based on relational algebra as functions of a logical schema. We present an algorithm, integrated with a conventional query optimizer, that translates queries over this logical schema into plans that access the storage structures. We also show how to compile update requests into plans that update all relevant storage structures consistently and optimally. Finally, we report on experiments with a prototype implementation of our approach that demonstrate how it allows storage structures to be tuned to the expected or observed workload to achieve significantly better performance than is possible with conventional techniques.} ) @inproceedings( vldb:DG94, author = "Diane L. Davison and Goetz Graefe", title = {{Memory-Contention Responsive Hash Joins}}, booktitle = "Proceedings of the Twentieth International Conference on Very Large Databases", year = 1994, address = "Santiago, Chile", pages = "379--390", abstract = { In order to maximize system performance in environments with fluctuating memory contention, memory-intensive algorithms such as hash join must gracefully adapt to variations in available memory. Mixed workloads, creating fluctuations of erratic frequency and magnitude, make responsiveness to memory contention particularly important. Previous studies on adaptable hash joins have focused on lowering I/O costs by reducing the I/O volume, as measured in the number of pages, by spilling partitions from memory to disk and then restoring them into memory if more memory becomes available. In this paper, we present memory-contention responsive hash joins that (i) reduce the amount of time spent on I/O by using large I/O buffers, or clusters, (ii) dynamically vary the cluster size in response to fluctuations in memory availability, and (iii) employ earlier techniques of dynamic destaging and restoration. Our simulation results demonstrate that these combined techniques provide better performance than previous algorithms, particularly in environments with medium to high memory contention or with very frequent changes in memory availability.} ) @inproceedings( vldb:GPST94, author = "Alejandro Guti{\'e}rrez and Philippe Pucheral and Hermann Steffen and Jean-Marc Th{\'e}venin", title = {{Database Graph Views: A Practical Model to Manage Persistent Graphs}}, booktitle = "Proceedings of the Twentieth International Conference on Very Large Databases", year = 1994, address = "Santiago, Chile", pages = "391--402", abstract = { Advanced technical applications like routing systems or electrical network management systems introduce the need for complex manipulations of large size graphs. Efficiently supporting this requirement is now regarded as a key feature of future database systems. This paper proposes an abstraction mechanism, called Database Graph View, to define and manipulate various kinds of graphs stored in either relational, object oriented or file systems. A database graph view provides a functional definition of a graph which allows its manipulation independently of its physical organization. Derivation operators are proposed to define new graph views upon existing ones. These operators permit the composition, in a single graph view, of graphs having different node and edge types and different implementations. The graph view mechanism comes with an execution model where both set-oriented and pipelined execution of graph operators can be expressed. The graph view operators form a library which can be integrated in database systems or applications managing persistent data with no repercussion on the data organization.} ) @inproceedings( vldb:MS94a, author = "Florian Matthes and Joachim W. Schmidt", title = {{Persistent Threads}}, booktitle = "Proceedings of the Twentieth International Conference on Very Large Databases", year = 1994, address = "Santiago, Chile", pages = "403--414", abstract = { Persistent threads are a database programming concept particularly well-suited for applications that manage long-term, distributed or cooperative activities. We introduce persistent threads as a novel form of bindings from data in persistent object stores to activated code and relate them to existing binding concepts found in database programming. We also describe the integration of persistent threads into a polymorphically-typed database language and its supporting layered system architecture with particular emphasis on abstractly-defined thread representations which support thread analysis, optimization and portability.} ) @inproceedings( vldb:PS94, author = "Alexandra Poulovassilis and Carol Small", title = {{Investigation of Algebraic Query Optimisation Techniques for Database Programming Languages}}, booktitle = "Proceedings of the Twentieth International Conference on Very Large Databases", year = 1994, address = "Santiago, Chile", pages = "415--426", abstract = { A major challenge still facing the designers and implementors of database programming languages (DBPLs) is that of query optimisation. We investigate algebraic query optimisation techniques for DBPLs in the context of a purely declarative functional language that supports sets as first-class objects. Since the language is computationally complete issues such as non-termination of expressions and construction of infinite data structures can be investigated, whilst its declarative nature allows the issue of side effects to be avoided and a richer set of equivalences to be developed. The support of a set bulk data type enables much prior work on the optimisation of relational languages to be utilised. Finally, the language has a well-defined semantics which permits us to reason formally about the properties of expressions, such as their equivalence with other expressions and their termination.} ) @inproceedings( vldb:KK94, author = "Alfons Kemper and Donald Kossmann", title = {{Dual-Buffering Strategies in Object Bases}}, booktitle = "Proceedings of the Twentieth International Conference on Very Large Databases", year = 1994, address = "Santiago, Chile", pages = "427--438", abstract = { In this work, control strategies for combining two potentially powerful buffer management techniques in object bases were devised and evaluated: (1) buffer pool segmentation with segment-specific replacement criteria, and (2) dual buffering consisting of copying objects from pages into object buffers. Two dimensions exist for exerting control on the buffer pool: (1) the {\it copying\/} time which determines at what time objects are copied from their memory-resident home page, and (2) the {\it relocation\/} time which determines when a (copied) object is to be transferred back to its home page. Along both dimensions, it is possible to differentiate between an {\it eager\/} and a {\it lazy\/} strategy. The extensive experimental results indicate that lazy object copying combined with an eager relocation strategy is almost always superior and significantly outperforms page-based buffering in most applications.} ) @inproceedings( vldb:JS94, author = "Theodore Johnson and Dennis Shasha", title = {{2Q: a low overhead high performance buffer management replacement algorithm}}, booktitle = "Proceedings of the Twentieth International Conference on Very Large Databases", year = 1994, address = "Santiago, Chile", pages = "439--450", abstract = { In a path-breaking paper last year Pat and Betty O'Neil and Gerhard Weikum proposed a self-tuning improvement to the Least Recently Used (LRU) buffer management algorithm[OOW93]. Their improvement is called LRU/k and advocates giving priority to buffer pages based on the kth most recent access. (The standard LRU algorithm is denoted LRU/1 according to this terminology.) If P1's kth most recent access is more recent than P2's, then P1 will be replaced after P2. Intuitively, LRU/k for k $>$ 1 is a good strategy, because it gives low priority to pages that have been scanned or to pages that belong to a big randomly accessed file (e.g., the account file in TPC/A). They found that LRU/2 achieves most of the advantage of their method. The one problem of LRU/2 is the processor overhead to implement it. In contrast to LRU, each page access requires log N work to manipulate a priority queue where N is the number of pages in the buffer. Question: is there a low overhead way (constant overhead per access as in LRU) to achieve similar page replacement performance to LRU/2? Answer: Yes. Our ``Two Queue'' algorithm (hereafter 2Q) has constant time overhead, performs as well as LRU/2, and requires no tuning. These results hold for real (DB2 commercial, Swiss bank) traces as well as simulated ones. Based on these experiments, we estimate that 2Q will provide a few percent improvement over LRU without increasing the overhead by more than a constant additive factor.} ) @inproceedings( vldb:NY94, author = "Raymond T. Ng and Jinhai Yang", title = {{Maximizing Buffer and Disk Utilizations for News On-Demand}}, booktitle = "Proceedings of the Twentieth International Conference on Very Large Databases", year = 1994, address = "Santiago, Chile", pages = "451--462", abstract = { In this paper, we study the problem of how to maximise the throughput of a multimedia system, given a fixed amount of buffer space and disk bandwidth both pre-determined at design time. Our approach is to maximize the utilizations of disk and buffers. We propose doing so in two ways. First, we analyse a scheme that scheme that allows multiple streams to share buffers. Our analysis and preliminary simulation results indicate that buffer sharing could lead to as much as 50\% reduction in total buffering requirements. Second, we develop two prefetching strategies: SP and IP. As will be demonstrated by SP, straightforward prefetching is not effective at all. In contrast, IP, which prefetches more intelligently than does SP, could be valuable in maximising the effective use of buffers and disk. Our preliminary simulation results show that IP could lead to a 40\% improvement in throughput.} ) @inproceedings( vldb:BPF94, author = "Maria L. Barja and Norman W. Paton and Alvaro A.A. Fernandes and M. Howard Williams and Andrew Dinn", title = {{An Effective Deductive Object-Oriented Database Through Language Integration}}, booktitle = "Proceedings of the Twentieth International Conference on Very Large Databases", year = 1994, address = "Santiago, Chile", pages = "463--474", abstract = { This paper presents an approach to the development of a practical deductive object-oriented database (DOOD) system based upon the integration of a logic query language with an imperative programming language in the context of an object-oriented data model. The approach is novel, in that a formally defined data model has been used as the starting point for the development of the two languages. This has enabled a seamless integration of the two languages, which is the central theme of this paper. It is shown how the two languages have been developed from the underlying data model, and several alternative approaches to their integration are presented, one of which has been chosen for implementation. The approach is compared with other examples of language integration in a database context, and it is argued that the resulting system overcomes a number of important challenges associated with the development of practical deductive object-oriented database systems.} ) @inproceedings( vldb:BW94, author = "Elena Baralis and Jennifer Widom", title = {{An Algebraic Approach to Rule Analysis in Expert Database Systems}}, booktitle = "Proceedings of the Twentieth International Conference on Very Large Databases", year = 1994, address = "Santiago, Chile", pages = "475--486", abstract = { Expert database systems extend the functionality of conventional database systems by providing a facility for creating and automatically executing Condition-Action rules. While Condition-Action rules in database systems are very powerful, they also can be very difficult to program, due to the unstructured and unpredictable nature of rule processing. We provide methods for static analysis of Condition-Action rules; our methods determine whether a given rule set is guaranteed to terminate, and whether rule execution is confluent (has a guaranteed unique final state). Our methods are based on previous methods for analyzing rules in active database systems. We improve considerably on the previous methods by providing analysis criteria that are much less conservative: our methods often determine that a rule set will terminate or is confluent when previous methods could not. Our improved analysis is based on a "propagation" algorithm, which uses a formal approach based on an extended relational algebra to accurately determine when the action of one rule can affect the condition of another. Our algebraic approach yields methods that are applicable to a broad class of expert database rule languages.} ) @inproceedings( vldb:AS94, author = "Rakesh Agrawal and Ramakrishnan Srikant", title = {{Fast Algorithms for Mining Association Rules in Large Databases}}, booktitle = "Proceedings of the Twentieth International Conference on Very Large Databases", year = 1994, address = "Santiago, Chile", pages = "487--499", abstract = { We consider the problem of discovering association rules between items in a large database of sales transactions. We present two new algorithms for solving this problem that are fundamentally different from the known algorithms. Empirical evaluation shows that these algorithms outperform the known algorithms by factors ranging from three for small problems to more than an order of magnitude for large problems. We also show how the best features of the two proposed algorithms can be combined into a hybrid algorithm, called AprioriHybrid. Scale-up experiments show that AprioriHybrid scales linearly with the number of transactions. AprioriHybrid also has excellent scale-up properties with respect to the transaction size and the number of items in the database.} ) @inproceedings( vldb:KF94, author = "Ibrahim Kamel and Christos Faloutsos", title = {{Hilbert R-tree: An Improved R-tree using Fractals}}, booktitle = "Proceedings of the Twentieth International Conference on Very Large Databases", year = 1994, address = "Santiago, Chile", pages = "500--509", abstract = { We propose a new R-tree structure that outperforms all the older ones. The heart of the idea is to facilitate the deferred splitting approach in R-trees. This is done by proposing an ordering on the R-tree nodes. This ordering has to be `good', in the sense that it should group `similar' data rectangles together, to minimize the area and perimeter of the resulting minimum bounding rectangles (MBRs). Following [Kamel93] we have chosen the so-called `2D-c' method, which sorts rectangles according to the Hilbert value of the center of the rectangles. Given the ordering, every node has a well-defined set of sibling nodes; thus, we can use deferred splitting. By adjusting the split policy, the Hilbert R-tree can achieve as high utilization as desired. To the contrary, the R*-tree has no control over the space utilization, typically achieving up to 70\%. We designed the manipulation algorithms in detail, and we did a full implementation of the Hilbert R-tree. Our experiments show that the `2-to-3' split policy provides a compromise between the insertion complexity and the search cost, giving up to 28\% savings over the R*-tree [Beckmann90] on real data.} ) @inproceedings( vldb:SKN94, author = "Ambuj Shatdal and Chander Kant and Jeffrey F. Naughton", title = {{Cache Conscious Algorithms for Relational Query Processing}}, booktitle = "Proceedings of the Twentieth International Conference on Very Large Databases", year = 1994, address = "Santiago, Chile", pages = "510--521", abstract = { The current main memory (DRAM) access speeds lag far behind CPU speeds. Cache memory, made of static RAM, is being used in today's architectures to bridge this gap. It provides access latencies of 2-4 processor cycles, in contrast to main memory which requires 15-25 cycles. Therefore, the performance of the CPU depends upon how well the cache can be utilized. We show that there are significant benefits in redesigning our traditional query processing algorithms so that they can make better use of the cache. The new algorithms run 8\%-200\% faster than the traditional ones.} ) @inproceedings( vldb:XH94, author = "Zhaohui Xie and Jiawei Han", title = {{Join Index Hierarchies for Supporting Efficient Navigations in Object-Oriented Databases}}, booktitle = "Proceedings of the Twentieth International Conference on Very Large Databases", year = 1994, address = "Santiago, Chile", pages = "522--533", abstract = { A join index hierarchy method is proposed to handle the ``goto's on disk'' problem in object-oriented query processing. The method constructs a hierarchy of join indices and transforms a sequence of pointer chasing operations into a simple search in an appropriate join index file, and thus accelerates navigation in object-oriented databases. The method extends the join index structure studied in relational and spatial databases, supports both forward and backward navigations among objects and classes, and localizes update propagations in the hierarchy. Our performance study shows that partial join index hierarchy outperforms several other indexing mechanisms in object-oriented query processing.} ) @inproceedings( vldb:SW94, author = "S. Sripada and B. W{\"u}thrich", title = {{Cumulative Updates}}, booktitle = "Proceedings of the Twentieth International Conference on Very Large Databases", year = 1994, address = "Santiago, Chile", pages = "534--545", abstract = { When an update to a view is requested by a user, there may be no unique way of updating the stored relations in the database to realize the requested update. Chosing one of the alternatives for updating stored relations may not reflect the change that has actually taken place in the real world; in the presence of other derived views, the database may actually present a very wrong model of the world to the user. The problem is even more severe in the case of deductive databases. For avoiding this problem, we introduce a new notion of view updates, called {\em cumulative updates}. The key idea behind cumulative updates is that update mechanisms should wait for further update requests to resolve ambiguities. Equivalently, current update requests must also take into account previous requests made to the knowledge base. Cumulative updates, therefore, subsume conventional updates in which only the current update request is considered. In this paper, we motivate the need for cumulative updates and formally define the notion of such updates as well as the different classes therein. We then give methods for computing one particular class of cumulative updates.} ) @inproceedings( vldb:LOT94, author = "Hongjun Lu and Beng-Chin Ooi and Kian-Lee Tan", title = {{On Spatially Partitioned Temporal Join}}, booktitle = "Proceedings of the Twentieth International Conference on Very Large Databases", year = 1994, address = "Santiago, Chile", pages = "546--557", abstract = { This paper presents an innovative partition-based time join strategy for temporal databases where time is represented by time intervals. The proposed method maps time intervals to points in a two dimensional space and partition the space into subspaces. Tuples of a temporal relation are clustered into partitions based on the mapping in the space. As a result, when two temporal relations are to be joined over the time attribute, a partition in one relation only needs to be compared with a predetermined set of partitions of the other relation. The mapping scheme and the join algorithms are described. The use of spatial indexing techniques to support direct access to the stored partitions is discussed. The results of a preliminary performance study indicate the efficiency of the proposed method.} ) @inproceedings( vldb:DKL94, author = "David J. DeWitt and Navin Kabra and Jun Luo and Jignesh M. Patel and Jie-Bing Yu", title = {{Client-Server Paradise}}, booktitle = "Proceedings of the Twentieth International Conference on Very Large Databases", year = 1994, address = "Santiago, Chile", pages = "558--569", abstract = { This paper describes the design and implementation of Paradise, a database system designed for handling GIS type of applications. The current version of Paradise, uses a client-server architecture and provides an extended-relational data model for modeling GIS applications. Paradise supports an extended version of SQL and provides a graphical user interface for querying and browsing the database. We also describe the results of benchmarking Paradise using the Sequoia 2000 storage benchmark.} ) @inproceedings( vldb:SYH94, author = "A. Prasad Sistla and Clement Yu and R. Haddad", title = {{Reasoning About Spatial Relationships in Picture Retrieval Systems}}, booktitle = "Proceedings of the Twentieth International Conference on Very Large Databases", year = 1994, address = "Santiago, Chile", pages = "570--581", abstract = { In this paper, we consider various spatial relationships that are of general interest in pictorial database systems. We present a set of rules that allow us to deduce new relationships from a given set of relationships. A deductive mechanism using these rules can be used in query processing systems that retrieve pictures by content. The given set of rules are shown to be sound, i.e. the deductions are logically correct. The rules are also shown to be complete for three dimensional systems, i.e. every relationship which is implied by a given consistent set of relationships F is deducible from F using the given rules. In addition, we show that the given set of rules is incomplete for two dimensional systems.} ) @inproceedings( vldb:Chi94, author = "Tzi{-}cker Chiueh", title = {{Content-Based Image Indexing}}, booktitle = "Proceedings of the Twentieth International Conference on Very Large Databases", year = 1994, address = "Santiago, Chile", pages = "582--593", abstract = { We formulate the content-based image indexing problem as a multi-dimensional nearest-neighbor search problem, and develop/implement an optimistic vantage-point tree algorithm that can dynamically adapt the indexed search process to the characteristics of given queries. Based on our performance study, the system typically only needs to touch less than 20\% of the index entries for well-behaved queries, i.e., when the query images are relatively close to their nearest neighbors in the database. We also report in this paper the results of extensive performance experiments, which characterize the impacts of various configuration and workload parameters on the performance of the proposed algorithm.} ) @inproceedings( vldb:OBRS94, author = "Banu {\"O}zden and Alexandros Biliris and Rajeev Rastogi and Avi Silberschatz", title = {{A Low-Cost Storage Server for Movie on Demand Databases}}, booktitle = "Proceedings of the Twentieth International Conference on Very Large Databases", year = 1994, address = "Santiago, Chile", pages = "594--605", abstract = { With recent advances in storage and network technology it is now possible to provide "movie on demand" (MOD) service, eliminating the inflexibility inherent in todays broadcast cable systems. A MOD server is a computer system that stores movies in compressed digital form and provides support for different portions of compressed movie data to be accessed and transmitted concurrently. In this paper, we present a low-cost storage architecture for a MOD server that relies principally on disks. The high bandwidths of disks in conjunction with a clever strategy for striping movies on them is utilized in order to enable simultaneous access and transmission of ``certain'' different portions of a movie. We also present a wide range of schemes for implementing VCR-like functions.} ) @inproceedings( vldb:CKAK94, author = "S. Chakravarthy and V. Krishnaprasad and E. Anwar and S.-K. Kim", title = {{Composite Events for Active Databases: Semantics, Contexts and Detection}}, booktitle = "Proceedings of the Twentieth International Conference on Very Large Databases", year = 1994, address = "Santiago, Chile", pages = "606--617", abstract = { Making a database system active entails developing an expressive event specification language with well-defined semantics, algorithms for the detection of composite events, and an architecture for an event detector along with its implementation. This paper presents the semantics of composite events using the notion of a global event history (or a global event log). Parameter contexts are introduced and precisely defined to facilitate efficient management and detection of composite events. Finally an architecture and the implementation of a composite event detector is analysed in the context of an object-oriented active DBMS.} ) @inproceedings( vldb:SABdB94, author = "Hennie J. Steenhagen and Peter M.G. Apers and Henk M. Blanken and Rolf A. de By", title = {{From Nested-Loop to Join Queries in OODB}}, booktitle = "Proceedings of the Twentieth International Conference on Very Large Databases", year = 1994, address = "Santiago, Chile", pages = "618--629", abstract = { Most declarative SQL-like query languages for object-oriented database systems are orthogonal languages allowing for arbitrary nesting of expressions in the \mbox{{\bf select-},} \mbox{{\bf from-},} and {\bf where-}clause. Expressions in the {\bf from-}clause may be base tables as well as set-valued attributes. In this paper, we propose a general strategy for the optimization of nested OOSQL queries. As in the relational model, the translation/optimization goal is to move from tuple- to set-oriented query processing. Therefore, OOSQL is translated into the algebraic language ADL, and by means of algebraic rewriting nested queries are transformed into join queries as far as possible. Four different optimization options are described, and a strategy to assign priorities to options.} ) @inproceedings( vldb:PZMY94, author = "Alain Pirotte and Esteban Zim{\'a}nyi and David Massart and Tatiana Yakusheva", title = {{Materialization: A Powerful and Ubiquitous Abstraction Pattern}}, booktitle = "Proceedings of the Twentieth International Conference on Very Large Databases", year = 1994, address = "Santiago, Chile", pages = "630--641", abstract = { Materialization is a useful abstraction pattern that can be identified in many application settings. Intuitively, materialization is the relationship between a class of categories (e.g., models of cars) and a class of more concrete objects (e.g., individual cars). The talk gives a precise semantic definition of materialization in terms of the usual is-a and is-of abstractions, and of the class/metaclass correspondence. New and powerful inheritance mechanisms are associated with materialization. Examples, properties, and extensions of materialization are also presented. Providing materialization as an abstraction mechanism for conceptual modeling enhances expressiveness by a controled introduction of classification at the application level.} ) @inproceedings( vldb:MR94, author = "Alice H. Muntz and Christian T. Ramiller", title = {{A Requirement-Based Approach to Data Modeling and Re-Engineering}}, booktitle = "Proceedings of the Twentieth International Conference on Very Large Databases", year = 1994, address = "Santiago, Chile", pages = "643--654", note = "Invited Talk", abstract = { This paper reports on the managerial experience, technical approach, and lessons learned from re-engineering eight departmental large-scale information systems. The driving strategic objective of each project was to migrate these systems into a set of enterprise-wide systems, which incorporate current and future requirements, drastically reduce operational and maintenance cost, and facilitate common understandings among stakeholders (i.e., policy maker, high-level management, IS developer/maintainer/end-users). A logical data model, which contains requirements, rules, physical data representation as well as logical data object, clearly documents the baseline data requirements implemented by the legacy system and is crucial to achieve this strategic goal. Re-engineering products are captured in the dictionaries of a CASE tool (i.e., in the form of a business process decomposition hierarchy, as-is data model, normalized logical data model, and linkages among data objects) and are supplemented with traceability matrices in spreadsheets. The re-engineered data products are used as follows: (1) migration of the legacy databases to relational database management systems, (2) automatically generation of databases and applications for migration from mainframes to client-server, (3) enterprise data standardization, (4) integration of disparate information systems, (5) re-documentation, (6) data quality assessment and assurance, and (7) baseline specifications for future systems.} ) @inproceedings( vldb:Les94, author = "Michael Lesk", title = {{Experiments on Access to Digital Libraries: How can Images and Text be Used Together}}, booktitle = "Proceedings of the Twentieth International Conference on Very Large Databases", year = 1994, address = "Santiago, Chile", pages = "655--667", note = "Invited Talk", abstract = { Should digital libraries be based on image or text display? Which will serve users better? Experience and experiments show that users can employ either, and that there are technical advantages to each format. Often, material in both formats can be used together, and the long-run trend is probably towards Ascii material, even if reached by a circuitous path via images and OCR during a transition.} ) @inproceedings( vldb:RS94, author = "Arnon Rosenthal and Leonard J. Seligman", title = {{Data Integration in the Large: The Challenge of Reuse}}, booktitle = "Proceedings of the Twentieth International Conference on Very Large Databases", year = 1994, address = "Santiago, Chile", pages = "669--675", note = "Industrial Case", abstract = { Much published research on data integration considers only a ``one shot'' effort to produce an integrated schema or a multidatabase query. This paper examines a more complex environment. Our clients carry out multiple integration efforts, producing multiple kinds of integrated systems that involve overlapping subsets of their component databases. Metadata is costly to collect and maintain, so one wishes to reuse it wherever possible. We thus must devise ways to reuse integration metadata across integration efforts, though the efforts may have different goals and may concern overlapping subsets of the components. This paper identifies and examines issues of maximizing information and code reuse by organizations facing data integration in the large.} ) @inproceedings( vldb:ABH94, author = "Sarabjot S. Anand and David A. Bell and John G. Hughes", title = {{An Empirical Performance Study of the Ingres Search Accelerator for a Large Property Management Database System}}, booktitle = "Proceedings of the Twentieth International Conference on Very Large Databases", year = 1994, address = "Santiago, Chile", pages = "676--685", note = "Industrial Case", abstract = { The property management database system under development at the Northern Ireland Housing Executive (NIHE) is one of the largest relational database systems under implementation in Europe. The application system has a high expected transaction processing rate of approximately 37000 transactions per day with approx. 250 on-line users. An implementation on such a large scale requires careful consideration of performance. In this paper we consider the effect of the Ingres Search Accelerator on the transaction processing efficiency of the system. The performance enhancement brought about by SCAFS (ICL's current version of the well-known Content Addressable File Store, CAFS -- the heart of the Ingres Search Accelerator) for different file organisations is assessed. Recommendations on how the performance of SCAFS can be improved by tuning certain parameters is provided. We also provide a rough guideline as to when the Ingres Query Optimizer "decides" to use SCAFS for different file organisations and point out deficiencies in this decision making process. We conclude this paper by recommending techniques that may be employed to increase the role of the Ingres Search Accelerator in Ingres database systems.} ) @inproceedings( vldb:HM94b, author = "Rune Hjelsvold and Roger Midtstraum", title = {{Modelling and Querying Video Data}}, booktitle = "Proceedings of the Twentieth International Conference on Very Large Databases", year = 1994, address = "Santiago, Chile", pages = "686--694", note = "Industrial Case", abstract = { As video data is penetrating many information systems the need for database support for video data evolves. In this paper we present a generic data model that captures the structure of a video document and that provides a means for indexing a video stream. We also discuss query language features that can take advantage of the proposed model. We have identified basic operators that should be implemented in the query language to support content based queries. The paper also analyses how these operators can be used to provide video data queries. The model has been used as a basis for a television news archive prototype and some experimental results are presented.} ) @inproceedings( vldb:IW94, author = "Balakrishna R. Iyer and David Wilhite", title = {{Data Compression Support in Databases}}, booktitle = "Proceedings of the Twentieth International Conference on Very Large Databases", year = 1994, address = "Santiago, Chile", pages = "695--704", note = "Industrial Case", abstract = { Computers running database management applications often manage large amounts of data. Typically, the price of the I/O subsytem is a considerable portion of the computing hardware. Fierce price competition demands every possible saving. Lossless data compression methods, when appropriately integrated with the dbms, yield significant savings. Roughly speaking, a slight increase in CPU cycles is more than offset by savings in I/O subsystems. various design issues arise in the use of data compression in the dbms -- from the choice of algorithm, statistics collection, hardware versus software based based compression, location of the compression function in the overall computer system architecture, unit of compression, update in place, and the application of logic to compressed data. These are methodically examined and tradeoffs discussed in the context of choices made for IBM's DB2 dbms product.} ) @inproceedings( vldb:BD94, author = "P.A. Bernstein and U. Dayal", title = {{An Overview of Repository Technology}}, booktitle = "Proceedings of the Twentieth International Conference on Very Large Databases", year = 1994, address = "Santiago, Chile", pages = "705--713", note = "Industrial Case", abstract = { A repository is a shared database of information about engineered artifacts. We define a repository manager to be a database application that supports checkout/checkin, version and configuration management, notification, context management, and workflow control. Since the main value of a repository is in the tools that use it, we discuss technical issues of integrating tools with repositories. We also discuss how to implement a repository manager by layering it on a DBMS, focusing especially on issues of programming interface, performance, distribution, and interoperability.} ) @inproceedings( vldb:CB94, author = "Munir Cochinwala and John Bradley", title = {{A Multidatabase System for Tracking and Retrieval of Financial Data}}, booktitle = "Proceedings of the Twentieth International Conference on Very Large Databases", year = 1994, address = "Santiago, Chile", pages = "714--721", note = "Industrial Case", abstract = { We have built a multidatabase system to support a financial application that stores historical data used by traders to identify trends in the market. The application has an update rate (append-only) of 500 inserts per second and also has sub-second response requirements for queries. A typical query requests between 100-1000 records. In this paper we define the characteristics of the application, the multidatabase system we used to support the applications and the extensions we made in the application to achieve the required functionality and performance.} ) @inproceedings( vldb:GRS94, author = "Nathan Goodman and Steve Rozen and Lincoln Stein", title = {{Building a Laboratory Information System Around a C++-Based Object-Oriented DBMS}}, booktitle = "Proceedings of the Twentieth International Conference on Very Large Databases", year = 1994, address = "Santiago, Chile", pages = "722--729", note = "Industrial Case", abstract = { MapBase is a laboratory information system that has been supporting a high-throughput genome-mapping operation for the last three years. We chose to build MapBase around a C++-based OODBMS because, like CAD, CASE, and GIS applications, MapBase must be able to represent complex data and operations while providing fast response. However, MapBase also turned out to share many characteristics of classical information systems: it provides a central repository of carefully administered, mission-critical data used by clients written in many languages and running on a variety of hardware. In addition, our laboratory emphasizes continuous process re-engineering, with the result that MapBase's schema must evolve rapidly in order to reflect the current experimental workflow. We discuss how the technical characteristics of our OODBMS interacted with our requirements to form MapBase's current architecture, and we analyze its strengths and weaknesses.} ) @inproceedings( vldb:MC94, author = "Bruce E. Martin and R.G.G. Cattell", title = {{Relating Distributed Objects}}, booktitle = "Proceedings of the Twentieth International Conference on Very Large Databases", year = 1994, address = "Santiago, Chile", pages = "730--739", note = "Industrial Case", abstract = { Many relational and object-oriented database systems provide referential integrity and compound operations on related objects using relationship mechanisms. Distributed object systems are emerging to support applications that access objects across distributed, heterogeneous system boundaries. Because the fundamental assumptions of distributed, heterogeneous, federated computing systems differ from database systems, supporting object relationships in such an environment requires different approaches to the representation and manipulation of relationships than those traditionally used in database systems. This paper describes the Relationship Service for SunSoft's Distributed Object Environment (DOE). We describe the fundamental assumptions of distributed object systems and motivate our design in that context. Keywords: relationships, object-oriented systems, complex objects, distributed computing.} ) @inproceedings( vldb:YA94, author = "Tak W. Yan and Jurgen Annevelink", title = {{Integrating a Structured-Text Retrieval System with an Object-Oriented Database System}}, booktitle = "Proceedings of the Twentieth International Conference on Very Large Databases", year = 1994, address = "Santiago, Chile", pages = "740--749", note = "Industrial Case", abstract = { We describe the integration of a structured-text retrieval system (TextMachine) into an object-oriented database system (OpenODB). We use the external function capability of the database system to encapsulate the text retrieval system as an external information source. Through query translation, we are able to provide a tight integration in the query language and processing; the user can access the text retrieval system using a standard database query language. The efficient and effective retrieval of structured text performed by the text retrieval system is combined with the rich modeling and general-purpose querying capabilities of the database system, resulting in an integrated system with querying power beyond those of the underlying systems. The integrated system also provides uniform access to textual data in the text retrieval system and structured data in the database system, thus allowing fusion of information.} ) @inproceedings( vldb:Spa94, author = "Stefano Spaccapietra", title = {{User Interfaces; Who Cares?}}, booktitle = "Proceedings of the Twentieth International Conference on Very Large Databases", year = 1994, address = "Santiago, Chile", pages = "751", note = "Panel", abstract = { According to Michael Stonebraker's experiences, if you select a group of well-known experts in database research and ask them to identify the domains where meeting user's requirements is of primary importance and at the same time more significant advances are needed and more research should be promoted, user interfaces comes out as number one in the list. This has been the case over the last decade. Despite such a strong incentive, research on user interfaces seems to remain marginal within the database community. Part of this community considers that this is a domain for development, not for research. Often researchers feel that the specification of an user interface is not much more than assembling widgets in some order. Significantly, the VLDB 94 program offers no contribution on user interfaces. This panel will try to investigate the reasons for such a gap between discourse and practice, and look for remedies.} ) @inproceedings( vldb:ZI94, author = "Maria Zemankova and Yannis E. Ioannidis", title = {{Scientific Databases -- State of the Art and Future Directions}}, booktitle = "Proceedings of the Twentieth International Conference on Very Large Databases", year = 1994, address = "Santiago, Chile", pages = "752--753", note = "Panel", abstract = { The aim of this interdisciplinary panel is to examine scientific databases achievements (or lessons learned) and explore future challenges for the database community. In particular, some of the topics discussed include: \begin{itemize} \item state of the art in scientific databases research and practice. \item specific requirements in scientific data management (e.g., metadata, data calibration/validity, management of uncertainty in research hypotheses/results, designing and managing scientific experiments, and mobile/field databases). \item applicability and/or shortcomings of the current database technology in scientific or engineering domains. \item priority of information processing needs in scientific communities. (e.g., ease of access (e.g., Mosaic) vs. expressiveness of a query language (e.g., MSQL), integration of mass storage systems with database management systems, interoperability of database management systems with visualization and analysis tools, managing multi-media data, or allowing scientific collaboration). \item specific research or development issues that are very challenging for database researchers and/or can have extensive impact in conduct of research. \item strategies for forming successful interdisciplinary teams. \end{itemize}} ) @inproceedings( vldb:Ber94, author = "Philip A. Bernstein", title = {{PC Database Systems -- Present and Future}}, booktitle = "Proceedings of the Twentieth International Conference on Very Large Databases", year = 1994, address = "Santiago, Chile", pages = "754", note = "Panel", abstract = { PC DBMSs are growing up. No longer just single-user B-tree access methods, some products now have remote DB access, query optimization, and multi-user capability. We can expect much more from them soon. What will future products look like? What new technical challenges arise? How can DB researchers contribute?} ) @inproceedings( vldb:LSSZ94, author = "Alon Y. Levy and Avi Silberschatz and Divesh Srivastava and Maria Zemankova", title = {{Challenges for Global Information Systems}}, booktitle = "Proceedings of the Twentieth International Conference on Very Large Databases", year = 1994, address = "Santiago, Chile", pages = "755", note = "Panel", abstract = { Currently, the Internet provides access to a very large number and wide variety of information sources (e.g., textual databases, sites containing technical reports, directory listings), and systems to access these sources (e.g., World Wide Web, Gopher, WAIS). The challenge is to provide easy, efficient, robust and secure access to this information and other kinds (e.g., relational and object oriented databases). The aim of this panel is to explore whether there are any new technical problems, relevant to the Database field, that need to be solved in order to realize such global information systems. In particular, we debate whether existing techniques from database systems (e.g., multidatabases and distributed databases) can be applied or straightforwardly extended to global information systems. Furthermore, we attempt to establish {\em realistic} goals for database technologies in global information systems.} )