From 4d3e7460b8b000dcbf2b4a1d6a8cd641690775cb Mon Sep 17 00:00:00 2001 From: Chris Deptula Date: Tue, 5 Mar 2019 14:40:48 -0700 Subject: [PATCH 1/3] [MONDRIAN-2655] Add AggregateScanSchema and AggregateScanCatalog connection properties to limit the catalogs and schemas that are scanned for tables when establishing the database connection --- .../rolap/aggmatcher/AggSchemaScanTest.java | 143 ++++ mondrian/src/it/java/mondrian/test/Main.java | 3 +- .../java/mondrian/rolap/RolapConnection.java | 6 +- .../rolap/RolapConnectionProperties.java | 27 +- .../main/java/mondrian/rolap/RolapSchema.java | 24 +- .../mondrian/rolap/aggmatcher/AggGen.java | 7 +- .../rolap/aggmatcher/AggTableManager.java | 26 +- .../mondrian/rolap/aggmatcher/JdbcSchema.java | 74 +- .../site/resources/doc/aggregate_tables.html | 652 +++++++++--------- .../src/site/resources/doc/configuration.html | 80 ++- 10 files changed, 658 insertions(+), 384 deletions(-) create mode 100644 mondrian/src/it/java/mondrian/rolap/aggmatcher/AggSchemaScanTest.java diff --git a/mondrian/src/it/java/mondrian/rolap/aggmatcher/AggSchemaScanTest.java b/mondrian/src/it/java/mondrian/rolap/aggmatcher/AggSchemaScanTest.java new file mode 100644 index 0000000000..d43ed1bd48 --- /dev/null +++ b/mondrian/src/it/java/mondrian/rolap/aggmatcher/AggSchemaScanTest.java @@ -0,0 +1,143 @@ +/* + * This software is subject to the terms of the Eclipse Public License v1.0 + * Agreement, available at the following URL: + * http://www.eclipse.org/legal/epl-v10.html. + * You must accept the terms of that agreement to use this software. + * + * Copyright (c) 2002-2019 Hitachi Vantara.. All rights reserved. + */ + +package mondrian.rolap.aggmatcher; + +import junit.framework.Assert; +import mondrian.olap.Util; +import mondrian.rolap.RolapConnection; +import mondrian.rolap.RolapConnectionProperties; +import mondrian.test.FoodMartTestCase; + +import javax.sql.DataSource; +import java.sql.Connection; +import java.sql.DatabaseMetaData; +import java.sql.ResultSet; +import java.sql.SQLException; + +/** + * Test if AggSchemaScan and AggCatalogScan properties are used in JdbcSchema loadTablesOfType + * + */ +public class AggSchemaScanTest extends FoodMartTestCase { + + public AggSchemaScanTest( String name ) { + super(name); + } + + + public void testAggScanPropertiesEmptySchema() throws Exception { + + final RolapConnection rolapConn = (RolapConnection) getConnection(); + final DataSource dataSource = rolapConn.getDataSource(); + Connection sqlConnection = null; + try { + sqlConnection = dataSource.getConnection(); + Util.PropertyList propertyList = new Util.PropertyList(); + propertyList.put( RolapConnectionProperties.AggregateScanCatalog.name(), "bogus" ); + propertyList.put( RolapConnectionProperties.AggregateScanSchema.name(), "bogus" ); + JdbcSchema jdbcSchema = JdbcSchema.makeDB(dataSource); + jdbcSchema.resetAllTablesLoaded(); + jdbcSchema.getTablesMap().clear(); + + jdbcSchema.loadTables( propertyList ); + Assert.assertEquals( 0, jdbcSchema.getTablesMap().size() ); + } finally { + if (sqlConnection != null) { + try { + sqlConnection.close(); + } catch ( SQLException e) { + // ignore + } + } + } + } + + + public void testAggScanPropertiesPopulatedSchema() throws Exception { + + final RolapConnection rolapConn = (RolapConnection) getConnection(); + final DataSource dataSource = rolapConn.getDataSource(); + Connection sqlConnection = null; + try { + sqlConnection = dataSource.getConnection(); + DatabaseMetaData dbmeta = sqlConnection.getMetaData(); + if ( !dbmeta.supportsSchemasInTableDefinitions() && !dbmeta.supportsCatalogsInTableDefinitions() ) { + System.out.println( "Database does not support schema or catalog in table definitions. Cannot run test." ); + return; + } + Util.PropertyList propertyList = new Util.PropertyList(); + boolean foundSchema = false; + // Different databases treat catalogs and schemas differently. Figure out whether foodmart is a schema or catalog in this database + try { + String schema = sqlConnection.getSchema(); + String catalog = sqlConnection.getCatalog(); + if ( schema != null || catalog != null ) { + foundSchema = true; + propertyList.put( RolapConnectionProperties.AggregateScanCatalog.name(), catalog ); + propertyList.put( RolapConnectionProperties.AggregateScanSchema.name(), schema ); + } + } catch ( AbstractMethodError | Exception ex ) { + // Catch if the JDBC client throws an exception. Do nothing. + } + + // Some databases like Oracle do not implement getSchema and getCatalog with the connection, so try the dbmeta instead + if ( !foundSchema && dbmeta.supportsSchemasInTableDefinitions() ) { + try ( ResultSet resultSet = dbmeta.getSchemas() ) { + if ( resultSet.getMetaData().getColumnCount() == 2 ) { + while ( resultSet.next() ) { + if ( resultSet.getString( 1 ).equalsIgnoreCase( "foodmart" ) ) { + propertyList.put( RolapConnectionProperties.AggregateScanSchema.name(), resultSet.getString( 1 ) ); + propertyList.put( RolapConnectionProperties.AggregateScanCatalog.name(), resultSet.getString( 2 ) ); + foundSchema = true; + break; + } + } + } + + } + } + + if (dbmeta.supportsCatalogsInTableDefinitions() && !foundSchema) { + try ( ResultSet resultSet = dbmeta.getCatalogs() ) { + if ( resultSet.getMetaData().getColumnCount() == 1 ) { + while ( resultSet.next() ) { + if ( resultSet.getString( 1 ).equalsIgnoreCase( "foodmart" ) ) { + propertyList.put( RolapConnectionProperties.AggregateScanCatalog.name(), resultSet.getString( 1 ) ); + foundSchema = true; + break; + } + } + } + } + } + + if ( !foundSchema ) { + System.out.println( "Cannot find foodmart schema or catalog in database. Cannot run test." ); + return; + } + JdbcSchema jdbcSchema = JdbcSchema.makeDB(dataSource); + // Have to clear the table list because creating the connection loads this + jdbcSchema.resetAllTablesLoaded(); + jdbcSchema.getTablesMap().clear(); + + jdbcSchema.loadTables( propertyList ); + //The foodmart schema has 37 tables. + Assert.assertEquals( 37, jdbcSchema.getTablesMap().size() ); + } finally { + if (sqlConnection != null) { + try { + sqlConnection.close(); + } catch ( SQLException e) { + // ignore + } + } + } + } +} diff --git a/mondrian/src/it/java/mondrian/test/Main.java b/mondrian/src/it/java/mondrian/test/Main.java index 85758a776a..7cb12d0ac8 100644 --- a/mondrian/src/it/java/mondrian/test/Main.java +++ b/mondrian/src/it/java/mondrian/test/Main.java @@ -5,7 +5,7 @@ // You must accept the terms of that agreement to use this software. // // Copyright (C) 1998-2005 Julian Hyde -// Copyright (C) 2005-2018 Hitachi Vantara and others +// Copyright (C) 2005-2019 Hitachi Vantara and others // All Rights Reserved. // // jhyde, 21 January, 1999 @@ -361,6 +361,7 @@ public static Test suite() throws Exception { addTest(suite, RolapCubeHierarchyTest.class); addTest(suite, RolapCubeDimensionTest.class); addTest(suite, RolapUtilTest.class); + addTest(suite, AggSchemaScanTest.class); // Must be the last test. addTest(suite, TerminatorTest.class); diff --git a/mondrian/src/main/java/mondrian/rolap/RolapConnection.java b/mondrian/src/main/java/mondrian/rolap/RolapConnection.java index a755795e0c..627ff95f06 100644 --- a/mondrian/src/main/java/mondrian/rolap/RolapConnection.java +++ b/mondrian/src/main/java/mondrian/rolap/RolapConnection.java @@ -5,7 +5,7 @@ // You must accept the terms of that agreement to use this software. // // Copyright (C) 2001-2005 Julian Hyde -// Copyright (C) 2005-2017 Hitachi Vantara and others +// Copyright (C) 2005-2019 Hitachi Vantara and others // All Rights Reserved. */ package mondrian.rolap; @@ -386,7 +386,9 @@ static DataSource createDataSource( jdbcProperties); } String connLc = jdbcConnectString.toLowerCase(); - if ( ( connLc.indexOf("mysql") > -1 ) || ( connLc.indexOf("mariadb") > -1 ) ) { + if ((connLc.indexOf("mysql") > -1) + || (connLc.indexOf("mariadb") > -1)) + { // mysql driver needs this autoReconnect parameter jdbcProperties.setProperty("autoReconnect", "true"); } diff --git a/mondrian/src/main/java/mondrian/rolap/RolapConnectionProperties.java b/mondrian/src/main/java/mondrian/rolap/RolapConnectionProperties.java index 56d8867149..7aceb5bf8f 100644 --- a/mondrian/src/main/java/mondrian/rolap/RolapConnectionProperties.java +++ b/mondrian/src/main/java/mondrian/rolap/RolapConnectionProperties.java @@ -5,10 +5,9 @@ // You must accept the terms of that agreement to use this software. // // Copyright (C) 2003-2005 Julian Hyde -// Copyright (C) 2005-2017 Hitachi Vantara +// Copyright (C) 2005-2019 Hitachi Vantara // All Rights Reserved. */ - package mondrian.rolap; import java.lang.ref.SoftReference; @@ -203,7 +202,29 @@ public enum RolapConnectionProperties { * *

Defaults to "-1s". */ - PinSchemaTimeout; + PinSchemaTimeout, + + /** + * The "AggregateScanSchema" property is the name of the database schema + * to scan when looking for aggregate tables. If defined, Mondrian will + * only look for aggregate tables within this schema. This means that + * all aggregate tables, including explicitly defined tables must be in + * this schema. If not defined, Mondrian will scan every schema that + * the database connection has access to when looking for aggregate + * tables. + */ + AggregateScanSchema, + + /** + * The "AggregateScanCatalog" property is the name of the database + * catalog to scan when looking for aggregate tables. If defined, + * Mondrian will only look for aggregate tables within this catalog. + * This means that all aggregate tables, including explicitly defined + * tables must be in this catalog. If not defined, Mondrian will + * scan every catalog the database connection has access to when + * looking for aggregate tables. + */ + AggregateScanCatalog; /** * Any property beginning with this value will be added to the diff --git a/mondrian/src/main/java/mondrian/rolap/RolapSchema.java b/mondrian/src/main/java/mondrian/rolap/RolapSchema.java index 5dc534cd92..c4364591aa 100644 --- a/mondrian/src/main/java/mondrian/rolap/RolapSchema.java +++ b/mondrian/src/main/java/mondrian/rolap/RolapSchema.java @@ -5,12 +5,13 @@ // You must accept the terms of that agreement to use this software. // // Copyright (C) 2001-2005 Julian Hyde -// Copyright (C) 2005-2017 Hitachi Vantara and others +// Copyright (C) 2005-2019 Hitachi Vantara and others // All Rights Reserved. */ package mondrian.rolap; import mondrian.olap.*; +import mondrian.olap.Util.PropertyList; import mondrian.olap.fun.*; import mondrian.olap.type.*; import mondrian.resource.MondrianResource; @@ -211,7 +212,7 @@ private RolapSchema( DataSource dataSource) { this(key, connectInfo, dataSource, md5Bytes, md5Bytes != null); - load(catalogUrl, catalogStr); + load(catalogUrl, catalogStr, connectInfo); assert this.md5Bytes != null; } @@ -293,14 +294,29 @@ protected Logger getLogger() { return LOGGER; } + /** + * @deprecated API changed to also pass Mondrian connection properties + * @param catalogUrl URL of catalog + * @param catalogStr Text of catalog, or null + */ + @Deprecated + protected void load(String catalogUrl, String catalogStr) { + load(catalogUrl, catalogStr, new PropertyList()); + } + /** * Method called by all constructors to load the catalog into DOM and build * application mdx and sql objects. * * @param catalogUrl URL of catalog * @param catalogStr Text of catalog, or null + * @param connectInfo Mondrian connection properties */ - protected void load(String catalogUrl, String catalogStr) { + protected void load( + String catalogUrl, + String catalogStr, + PropertyList connectInfo) + { try { final Parser xmlParser = XOMUtil.createDefaultParser(); @@ -370,7 +386,7 @@ protected void load(String catalogUrl, String catalogStr) { throw Util.newError(e, "while parsing catalog " + catalogUrl); } - aggTableManager.initialize(); + aggTableManager.initialize(connectInfo); setSchemaLoadDate(); } diff --git a/mondrian/src/main/java/mondrian/rolap/aggmatcher/AggGen.java b/mondrian/src/main/java/mondrian/rolap/aggmatcher/AggGen.java index 4b1103d449..93e2ed7644 100644 --- a/mondrian/src/main/java/mondrian/rolap/aggmatcher/AggGen.java +++ b/mondrian/src/main/java/mondrian/rolap/aggmatcher/AggGen.java @@ -5,10 +5,9 @@ // You must accept the terms of that agreement to use this software. // // Copyright (C) 2005-2005 Julian Hyde -// Copyright (C) 2005-2017 Hitachi Vantara and others +// Copyright (C) 2005-2019 Hitachi Vantara and others // All Rights Reserved. */ - package mondrian.rolap.aggmatcher; import mondrian.olap.MondrianDef; @@ -157,7 +156,7 @@ protected void addForeignKeyToNotLostColumnUsages( private void init() { JdbcSchema db = JdbcSchema.makeDB(star.getDataSource()); try { - db.load(); + db.load(new Util.PropertyList()); } catch (SQLException ex) { getLogger().error(ex); return; @@ -467,7 +466,7 @@ private boolean addCollapsedColumn( return false; } - //CG guarantee the columns has been loaded before looking up them + // CG guarantee the columns has been loaded before looking up them try { jt.load(); } catch (SQLException sqle) { diff --git a/mondrian/src/main/java/mondrian/rolap/aggmatcher/AggTableManager.java b/mondrian/src/main/java/mondrian/rolap/aggmatcher/AggTableManager.java index 5cc31625c9..e8dd606d0d 100644 --- a/mondrian/src/main/java/mondrian/rolap/aggmatcher/AggTableManager.java +++ b/mondrian/src/main/java/mondrian/rolap/aggmatcher/AggTableManager.java @@ -5,7 +5,7 @@ // You must accept the terms of that agreement to use this software. // // Copyright (C) 2005-2005 Julian Hyde -// Copyright (C) 2005-2017 Hitachi Vantara and others +// Copyright (C) 2005-2019 Hitachi Vantara and others // All Rights Reserved. */ package mondrian.rolap.aggmatcher; @@ -14,6 +14,7 @@ import mondrian.olap.MondrianException; import mondrian.olap.MondrianProperties; import mondrian.olap.Util; +import mondrian.olap.Util.PropertyList; import mondrian.recorder.ListRecorder; import mondrian.recorder.MessageRecorder; import mondrian.recorder.RecorderException; @@ -38,7 +39,7 @@ *

  • A {@link mondrian.rolap.RolapSchema} creates an {@link AggTableManager}, * and stores it in a member variable to ensure that it is not * garbage-collected. - *
  • The {@link mondrian.rolap.RolapSchema} calls {@link #initialize()}, + *
  • The {@link mondrian.rolap.RolapSchema} calls {@link #initialize(PropertyList)}, * which scans the JDBC catalog and identifies aggregate tables. *
  • For each aggregate table, it creates an {@link AggStar} and calls * {@link RolapStar#addAggStar(AggStar)}. @@ -79,15 +80,25 @@ public Logger getLogger() { return LOGGER; } + /** + * @deprecated API updated to pass the Mondrian connection properties. + * See {@link #initialize(PropertyList)} + */ + @Deprecated + public void initialize() { + initialize(new Util.PropertyList()); + } + /** * Initializes this object, loading all aggregate tables and associating * them with {@link RolapStar}s. * This method should only be called once. + * @param connectInfo The Mondrian connection properties */ - public void initialize() { + public void initialize(PropertyList connectInfo) { if (MondrianProperties.instance().UseAggregates.get()) { try { - loadRolapStarAggregates(); + loadRolapStarAggregates(connectInfo); } catch (SQLException ex) { throw mres.AggLoadingError.ex(ex); } @@ -166,7 +177,10 @@ private String getFactTableName(RolapStar star) { * * @throws SQLException */ - private void loadRolapStarAggregates() throws SQLException { + private void loadRolapStarAggregates( + PropertyList connectInfo) + throws SQLException + { ListRecorder msgRecorder = new ListRecorder(); try { DefaultRules rules = DefaultRules.getInstance(); @@ -181,7 +195,7 @@ private void loadRolapStarAggregates() throws SQLException { db.flushUsages(); // loads tables, not their columns - db.load(); + db.load(connectInfo); loop: for (RolapStar star : getStars()) { diff --git a/mondrian/src/main/java/mondrian/rolap/aggmatcher/JdbcSchema.java b/mondrian/src/main/java/mondrian/rolap/aggmatcher/JdbcSchema.java index b937dc0b46..7bd847d79f 100644 --- a/mondrian/src/main/java/mondrian/rolap/aggmatcher/JdbcSchema.java +++ b/mondrian/src/main/java/mondrian/rolap/aggmatcher/JdbcSchema.java @@ -5,15 +5,18 @@ // You must accept the terms of that agreement to use this software. // // Copyright (C) 2005-2005 Julian Hyde -// Copyright (C) 2005-2018 Hitachi Vantara and others +// Copyright (C) 2005-2019 Hitachi Vantara and others // All Rights Reserved. */ package mondrian.rolap.aggmatcher; import mondrian.olap.MondrianDef; import mondrian.olap.MondrianProperties; +import mondrian.olap.Util; +import mondrian.olap.Util.PropertyList; import mondrian.resource.MondrianResource; import mondrian.rolap.RolapAggregator; +import mondrian.rolap.RolapConnectionProperties; import mondrian.rolap.RolapLevel; import mondrian.rolap.RolapStar; import mondrian.spi.Dialect; @@ -1130,17 +1133,26 @@ public Map getColumnMap() { private final SortedMap tables = new TreeMap(); - JdbcSchema(final DataSource dataSource) { + public JdbcSchema(final DataSource dataSource) { this.dataSource = dataSource; } + /** + * @deprecated API updated to pass Mondrian connection properties + * See: {@link #load(PropertyList)} + */ + @Deprecated + public void load() throws SQLException { + load(new Util.PropertyList()); + } + /** * This forces the tables to be loaded. - * + * @param connectInfo Mondrian connection properties * @throws SQLException */ - public void load() throws SQLException { - loadTables(); + public void load(PropertyList connectInfo) throws SQLException { + loadTables(connectInfo); } protected synchronized void clear() { @@ -1270,10 +1282,10 @@ private static int getSafeInt(ResultSet rs, int columnIndex) /** * Gets all of the tables (and views) in the database. * If called a second time, this method is a no-op. - * + * @param connectInfo The Mondrian connection properties * @throws SQLException */ - private void loadTables() throws SQLException { + protected void loadTables(PropertyList connectInfo) throws SQLException { if (allTablesLoaded) { return; } @@ -1281,15 +1293,33 @@ private void loadTables() throws SQLException { try { conn = getDataSource().getConnection(); final DatabaseMetaData databaseMetaData = conn.getMetaData(); + + final String scanSchemaProp = + connectInfo.get( + RolapConnectionProperties.AggregateScanSchema.name(), + getSchemaName()); + final String scanCatalogProp = + connectInfo.get( + RolapConnectionProperties.AggregateScanCatalog.name(), + getCatalogName()); + String[] tableTypes = { "TABLE", "VIEW" }; if (databaseMetaData.getDatabaseProductName().toUpperCase().indexOf( "VERTICA") >= 0) { for (String tableType : tableTypes) { - loadTablesOfType(databaseMetaData, new String[]{tableType}); + loadTablesOfType( + databaseMetaData, + new String[]{tableType}, + scanSchemaProp, + scanCatalogProp); } } else { - loadTablesOfType(databaseMetaData, tableTypes); + loadTablesOfType( + databaseMetaData, + tableTypes, + scanSchemaProp, + scanCatalogProp); } allTablesLoaded = true; } finally { @@ -1302,20 +1332,33 @@ private void loadTables() throws SQLException { /** * Loads definition of tables of a given set of table types ("TABLE", "VIEW" * etc.) + * @param databaseMetaData The databaseMetaData for the database connection + * to search for the list of tables + * @param tableTypes The table types to load. ("TABLE", "VIEW", etc.) + * @param scanCatalogProp The name of the database catalog to load the + * list of tables from. Null will load from every + * catalog. + * @param scanSchemaProp The name of the database schema to load the + * list of tables from. Null will load from every + * schema. */ private void loadTablesOfType( DatabaseMetaData databaseMetaData, - String[] tableTypes) + String[] tableTypes, + String scanSchemaProp, + String scanCatalogProp) throws SQLException { - final String schema = getSchemaName(); - final String catalog = getCatalogName(); final String tableName = "%"; ResultSet rs = null; try { + getLogger().debug( + "Getting list of tables from catalog " + + scanCatalogProp + " schema " + + scanSchemaProp + " table " + tableName); rs = databaseMetaData.getTables( - catalog, - schema, + scanCatalogProp, + scanSchemaProp, tableName, tableTypes); if (rs == null) { @@ -1343,11 +1386,12 @@ protected void addTable(final ResultSet rs) throws SQLException { String name = rs.getString(3); String tableType = rs.getString(4); Table table = new Table(name, tableType); + getLogger().debug("Adding table " + name); tables.put(table.getName(), table); } - private SortedMap getTablesMap() { + protected SortedMap getTablesMap() { return tables; } diff --git a/mondrian/src/site/resources/doc/aggregate_tables.html b/mondrian/src/site/resources/doc/aggregate_tables.html index 347d588320..44e39159e4 100644 --- a/mondrian/src/site/resources/doc/aggregate_tables.html +++ b/mondrian/src/site/resources/doc/aggregate_tables.html @@ -19,7 +19,7 @@
    Aggregate Tables
    - @@ -58,23 +58,27 @@

    Contents

  • Online/offline control
  • Properties that affect aggregates
  • +
      +
    1. Mondrian properties
    2. +
    3. Connect string properties
    4. +
  • References
  • -

    1. Introduction 

    -

    Unlike many OLAP servers, Mondrian does not store data on disk: it just works on -the data in the RDBMS, and once it has read a piece of data once, it stores that -data in its cache. This greatly simplifies the process of installing Mondrian, -but it puts limits on Mondrian's performance when Mondrian is applied to a huge +

    Unlike many OLAP servers, Mondrian does not store data on disk: it just works on +the data in the RDBMS, and once it has read a piece of data once, it stores that +data in its cache. This greatly simplifies the process of installing Mondrian, +but it puts limits on Mondrian's performance when Mondrian is applied to a huge dataset.

    -

    Consider what happens when the CEO runs her Sales Report first thing on a Monday -morning. This report contains a single number: the total sales of all products, -in all regions, this year. In order to get this number, Mondrian generates a +

    Consider what happens when the CEO runs her Sales Report first thing on a Monday +morning. This report contains a single number: the total sales of all products, +in all regions, this year. In order to get this number, Mondrian generates a query something like this:

    @@ -85,15 +89,15 @@

    1. Introduction 

    AND time.year = 2005
    -

    and sends it to the DBMS. The DBMS takes several minutes to execute it: which is -understandable because the DBMS has to read all of this year's records in the -fact table (a few million sales, say) and aggregate them into a single total. -Clearly, what is needed in this case, and in others like it, is a pre-computed +

    and sends it to the DBMS. The DBMS takes several minutes to execute it: which is +understandable because the DBMS has to read all of this year's records in the +fact table (a few million sales, say) and aggregate them into a single total. +Clearly, what is needed in this case, and in others like it, is a pre-computed summary of the data: an aggregate table.

    -

    An aggregate table coexists with the base fact table, +

    An aggregate table coexists with the base fact table, and contains pre-aggregated measures built from the -fact table. It is registered in Mondrian's schema, so that Mondrian can choose +fact table. It is registered in Mondrian's schema, so that Mondrian can choose whether to use the aggregate table rather than the fact table, if it is applicable for a particular query.

    @@ -101,7 +105,7 @@

    1. Introduction 

    There is extensive research, both empirical and theoretical, available on the web concerning different ways to structure aggregate tables and we will not attempt to duplicate any of it here.

    - @@ -112,9 +116,9 @@

    2. What are aggregate tables? 

    -

    The star schema has a single fact table Sales, two measure -columns (units and dollars) and four dimension tables -(Product, Mfr, Customer, Time, +

    The star schema has a single fact table Sales, two measure +columns (units and dollars) and four dimension tables +(Product, Mfr, Customer, Time, and Customer).

    On top of this star schema, we create the following multidimensional model:

    @@ -130,19 +134,19 @@

    2. What are aggregate tables? 

  • Dimension [Customer] has levels [All Customers], [State], [City], [Custid]
  • -
  • Dimension [Payment Method] has levels [All Payment +
  • Dimension [Payment Method] has levels [All Payment Methods], [Payment Method]
  • -

    Most of the dimensions have a corresponding dimension table, but there are -two exceptions. The [Product] dimension is a snowflake -dimension, which means that it is spread across more than one table (in -this case Product and Mfr). The [Payment Method] dimension +

    Most of the dimensions have a corresponding dimension table, but there are +two exceptions. The [Product] dimension is a snowflake +dimension, which means that it is spread across more than one table (in +this case Product and Mfr). The [Payment Method] dimension is a degenerate dimension; its sole attribute is the -payment column in the fact table, and so it does not need a dimension +payment column in the fact table, and so it does not need a dimension table.

    - @@ -156,17 +160,17 @@

    2.1 A simple aggregate table See how the original star schema columns have been combined into the table:

    @@ -176,31 +180,31 @@

    2.1 A simple aggregate table <Cube name="Sales">
      <Table name="sales">
    -     <AggName +     <AggName name="agg_1">
          <AggFactCount column="row count"/>
    -       <AggMeasure name="[Measures].[Unit +       <AggMeasure name="[Measures].[Unit Sales]" column="sum units"/>
    -       <AggMeasure +       <AggMeasure name="[Measures].[Min Units]" column="min units"/>
    -       <AggMeasure name="[Measures].[Max +       <AggMeasure name="[Measures].[Max Units]" column="max units"/>
    -      <AggMeasure name="[Measures].[Dollar +      <AggMeasure name="[Measures].[Dollar Sales]" column="sum dollars"/>
    -       <AggLevel name="[Time].[Year]" +       <AggLevel name="[Time].[Year]" column="year"/>
    -       <AggLevel name="[Time].[Quarter]" +       <AggLevel name="[Time].[Quarter]" column="quarter"/>
    -       <AggLevel name="[Product].[Mfrid]" +       <AggLevel name="[Product].[Mfrid]" column="mfrid"/>
    -       <AggLevel name="[Product].[Brand]" +       <AggLevel name="[Product].[Brand]" column="brand"/>
    -       <AggLevel name="[Product].[Prodid]" +       <AggLevel name="[Product].[Prodid]" column="prodid"/>
        </AggName>
    @@ -210,7 +214,7 @@

    2.1 A simple aggregate table Cube> - @@ -227,60 +231,60 @@

    2.2 Another aggregate table 

    <Cube name="Sales">
      <Table name="sales">
    -     <AggName +     <AggName name="agg_1" ... />
    -     <AggName +     <AggName name="agg_2">
          <AggFactCount column="row count"/>
    -       <AggForeignKey factColumn="prodid" +       <AggForeignKey factColumn="prodid" aggColumn="prodid"/>
    -       <AggMeasure name="[Measures].[Unit +       <AggMeasure name="[Measures].[Unit Sales]" column="sum units"/>
    -       <AggMeasure +       <AggMeasure name="[Measures].[Min Units]" column="min units"/>
    -       <AggMeasure name="[Measures].[Max +       <AggMeasure name="[Measures].[Max Units]" column="max units"/>
    -      <AggMeasure name="[Measures].[Dollar +      <AggMeasure name="[Measures].[Dollar Sales]" column="sum dollars"/>
    -       <AggLevel name="[Time].[Year]" +       <AggLevel name="[Time].[Year]" column="year"/>
    -       <AggLevel name="[Time].[Quarter]" +       <AggLevel name="[Time].[Quarter]" column="quarter"/>
    -       <AggLevel name="[Time].[Month]" +       <AggLevel name="[Time].[Month]" column="month"/>
    -       <AggLevel name="[Payment - Method].[Payment Method]" +       <AggLevel name="[Payment + Method].[Payment Method]" column="payment"/>
    -       <AggLevel name="[Customer].[State]" +       <AggLevel name="[Customer].[State]" column="state"/>
        </AggName>
      </Table>

      <Dimension name="Product">
    -    <Hierarchy hasAll="true" +    <Hierarchy hasAll="true" primaryKey="prodid" primaryKeyTable="Product">
    -      <Join leftKey="mfrid" +      <Join leftKey="mfrid" rightKey="mfrid">
    -        <Table +        <Table name="Product"/>
    -        <Table +        <Table name="Mfr"/>
          </Join>
    -      <Level +      <Level name="Manufacturer" table="Mfr" column="mfrid"/>
    -      <Level +      <Level name="Brand" table="Product" column="brand"/>
    -      <Level +      <Level name="Name" table="Product" column="prodid"/>
        </Hierarchy>
    @@ -292,19 +296,19 @@

    2.2 Another aggregate table 

    Several dimensions have been collapsed: [Time] at the -[Quarter] level; [Customer] at the [State] -level; and [Payment Method] at the [Payment Method] +[Quarter] level; [Customer] at the [State] +level; and [Payment Method] at the [Payment Method] -level. But the [Product] dimension has been retained in its +level. But the [Product] dimension has been retained in its original snowflake form.

    -

    The <AggForeignKey> element is -used to  declare that the column prodid links to the dimension -table, but all other columns remain in the Product and Mfr +

    The <AggForeignKey> element is +used to  declare that the column prodid links to the dimension +table, but all other columns remain in the Product and Mfr dimension tables.

    - @@ -323,42 +327,42 @@

    2.3 Non Collapsed Aggregate Levels& <Cube name="Sales">
      <Table name="sales">
    -     <AggName +     <AggName name="agg_3">
          <AggFactCount column="cnt"/>
    -       <AggMeasure name="[Measures].[Unit +       <AggMeasure name="[Measures].[Unit Sales]" column="sls"/>
    -       <AggLevel name="[Time].[Year]" +       <AggLevel name="[Time].[Year]" column="yer"/>
    -       <AggLevel name="[Time].[Quarter]" +       <AggLevel name="[Time].[Quarter]" column="qtr"/>
    -       <AggLevel name="[Time].[Month]" +       <AggLevel name="[Time].[Month]" column="mth"/>
    -       <AggLevel name="[Channel.Network].[Brand]" +       <AggLevel name="[Channel.Network].[Brand]" column="brn" collapsed="false"/>
        </AggName>
    -     <AggName +     <AggName name="agg_3">
          <AggFactCount column="cnt"/>
    -       <AggMeasure name="[Measures].[Unit +       <AggMeasure name="[Measures].[Unit Sales]" column="sls"/>
    -       <AggLevel name="[Time].[Year]" +       <AggLevel name="[Time].[Year]" column="yer"/>
    -       <AggLevel name="[Time].[Quarter]" +       <AggLevel name="[Time].[Quarter]" column="qtr"/>
    -       <AggLevel name="[Time].[Month]" +       <AggLevel name="[Time].[Month]" column="mth"/>
    -       <AggLevel name="[Channel.Distributor].[Brand]" +       <AggLevel name="[Channel.Distributor].[Brand]" column="brn" collapsed="false"/>
        </AggName>
    @@ -369,72 +373,72 @@

    2.3 Non Collapsed Aggregate Levels&
      <
    Dimension name="Channel">
    -     <Hierarchy hasAll="true" +     <Hierarchy hasAll="true" name="Network" primaryKey="prod" primaryKeyTable="prod">
    -       <Join leftKey="brn" +       <Join leftKey="brn" rightKey="brn" rightAlias="brn_mfr">
    -         <Table +         <Table name="prod"/>
    -         <Join leftKey="brn" +         <Join leftKey="brn" rightKey="brn" rightAlias="brn_mfr">
    -           <Table +           <Table name="brn_mfr"/>
    -           <Join leftKey="mfr" +           <Join leftKey="mfr" rightKey="mfr">
    -             <Table +             <Table name="brn_mfr"/>
    -             <Table +             <Table name="mfr_net"/>
              </Join>
            </Join>
          </Join>
    -       <Level +       <Level name="Network" table="mrf_net" column="net"/>
    -       <Level +       <Level name="Manufacturer" table="mfr_brn" column="brn"/>
    -       <Level +       <Level name="Brand" table="brn_mfr" column="brn"/>
    -       <Level +       <Level name="Product" table="prd" column="brd"/>
        </Hierarchy>
    -     <Hierarchy hasAll="true" +     <Hierarchy hasAll="true" name="Distributor" primaryKey="prod" primaryKeyTable="prod">
    -       <Join leftKey="brn" +       <Join leftKey="brn" rightKey="brn" rightAlias="brn_mfr">
    -         <Table +         <Table name="prod"/>
    -         <Join leftKey="brn" +         <Join leftKey="brn" rightKey="brn" rightAlias="brn_mfr">
    -           <Table +           <Table name="brn_mfr"/>
    -           <Join leftKey="mfr" +           <Join leftKey="mfr" rightKey="mfr">
    -             <Table +             <Table name="brn_mfr"/>
    -             <Table +             <Table name="mfr_dist"/>
              </Join>
            </Join>
          </Join>
    -       <Level +       <Level name="Distributor" table="mrf_dist" column="dist"/>
    -       <Level +       <Level name="Manufacturer" table="mfr_brn" column="brn"/>
    -       <Level +       <Level name="Brand" table="brn_mfr" column="brn"/>
    -       <Level +       <Level name="Product" table="prd" column="brd"/>
        </Hierarchy>
    @@ -466,7 +470,7 @@

    2.3 Non Collapsed Aggregate Levels&

    As with regular AggLevel elements, it is not necessary to include the bottom levels of the hierarchies. In the example above, we have ommitted the last level, [Product]

    - @@ -501,17 +505,17 @@

    3. Defining aggregate tables Mondrian supports two aggregation techniques which are called "lost" dimension and "collapsed" dimension. For the creation of any -given aggregate table these can +given aggregate table these can be applied independently to any number of different dimensions.

    A "lost" dimension is one which is completely missing from the aggregate -table. The measures that appear in the table have been aggregated +table. The measures that appear in the table have been aggregated across all values of the lost dimension. As an example, in a fact table with dimensions of time, location, and product and measure sales, for an aggregate table that did not have the location dimension that dimension would be "lost". Here, the sales measure would be the aggregation over all locations. An aggregate table where all of the dimensions -are lost is possible - it would have a single row with the measure +are lost is possible - it would have a single row with the measure aggregated over everything - sales for all time, all locations and all products.

    @@ -544,16 +548,16 @@

    3. Defining aggregate tables  -

    The second supported aggregation technique provides a finer level of +

    The second supported aggregation technique provides a finer level of control, the "collapsed" dimension technique. -Recall that the dimension key in the fact table refers (more or less) -to the +Recall that the dimension key in the fact table refers (more or less) +to the lowest level in the dimension hierarchy. For a collapsed dimension, the dimension key in the aggregate table is replaced with a set of dimension levels; the dimension key column is replaced with a set of columns; a fully denormalized summary table for that dimension. -As an example, if the time dimension with base fact table foreign key +As an example, if the time dimension with base fact table foreign key time_id had the levels: day, month, quarter and year, and in an aggregate it was collapsed to the month level, then the aggregate table would not have a time_id column but rather @@ -586,7 +590,7 @@

    3. Defining aggregate tables In the literature, there are other ways of creating aggregate tables but they are not supported by Mondrian at this time.

    - @@ -599,13 +603,13 @@

    4. Building aggregate tables 

    aggregate table definition, one can estimate that for a dimension with N levels, there are N+1 possible aggregate tables (N collapsed and 1 lost). Also, dimensions (with different dimension tables) can -be aggregated independently. -For the FoodMart Sales cube there are 1400 different possible aggregate +be aggregated independently. +For the FoodMart Sales cube there are 1400 different possible aggregate tables.

    Clearly, one does not want to create all possible aggregate tables. Which ones to create depends upon two considerations. The first -consideration is application dependent: +consideration is application dependent: the nature of the MDX queries that will be executed. If many of the queries deal with per month and per state questions, then an aggregate at those levels might be created. @@ -614,15 +618,15 @@

    4. Building aggregate tables 

    the lowest level to the next lowest generally gives greater bang for the buck than aggregating from the N to the N+1 (N>1) level. This is because 1) a first level aggregation can be used for all -queries at that level and above and 2) dimension fan-out tends to +queries at that level and above and 2) dimension fan-out tends to increase for the lower levels. Of course, your mileage may vary.

    -

    In a sense, picking which aggregate tables to build is analogous to +

    In a sense, picking which aggregate tables to build is analogous to picking which indexes to build on a table; it is application dependent and experience helps.

    -

    The hardest part about the actually creation and population of +

    The hardest part about the actually creation and population of aggregate tables is figuring out how to create the first couple; what the SQL looks like. After that they are pretty much all the same.

    @@ -710,16 +714,16 @@

    4. Building aggregate tables 

    do not recognize star joins will require indexes on both the fact table and the aggregate tables.

    -

    For our purposes here, the exact name of the aggregate table is not +

    For our purposes here, the exact name of the aggregate table is not important; the "agg_l_05_" preceding the base fact table's name sales_fact_1997. First, the aggregate table name must be different -from the base fact table name. Next, the aggregate table name ought to be +from the base fact table name. Next, the aggregate table name ought to be related to the base fact table name both for human eyeballing of what aggregate is associated with which fact table, but also, as described below, Mondrian employs mechanism to automagically recognize which tables are aggregates of others.

    -

    The following example is a collapsed dimension aggregate table +

    The following example is a collapsed dimension aggregate table where the time dimension has been rolled up to the month level.

    @@ -789,7 +793,7 @@

    4. Building aggregate tables 

    When creating a collapsed dimension aggregate one might consider creating indexes for the columns imported from the dimension that was collapsed.

    -

    Below is another aggregate table. This one has two lost dimensions (store_id and +

    Below is another aggregate table. This one has two lost dimensions (store_id and promotion_id) as well as collapsed dimension on time to the quarter level. This shows how aggregate techniques can be mixed.

    @@ -838,9 +842,9 @@

    4. Building aggregate tables 

    In the above three examples, for the most part the column names -in the aggregate are the same column names that appear in the fact +in the aggregate are the same column names that appear in the fact table and dimension tables. These tables would all be -recognized by the Mondrian +recognized by the Mondrian default aggregate recognizer. It is possible to create an aggregate table and name the columns arbitrarily. @@ -906,7 +910,7 @@

    4. Building aggregate tables 

    This aggregate table has column names that are not identical to those -found in the base fact table and dimension table. It is still a +found in the base fact table and dimension table. It is still a valid aggregate but Mondrian has to be told how to map its columns into those of the base fact table.

    @@ -919,7 +923,7 @@

    4. Building aggregate tables 

    In this case, one might first build the aggregate with only the lost time dimension and then build the aggregate with both lost time and product dimensions from that first aggregate - it will be -faster (in some cases, much faster) +faster (in some cases, much faster) to populate the second aggregate from the first rather than from the base fact table.

    @@ -927,36 +931,36 @@

    4. Building aggregate tables 

    pay attention to the size of the numeric columns - what might be big enough in the base fact table might not be big enough in an aggregate.

    -

    5. How Mondrian recognizes Aggregate Tables 

    Mondrian has to know about the aggregate tables in order to use them. -You can either define an aggregate explicitly, or set up rules to recognize +You can either define an aggregate explicitly, or set up rules to recognize several aggregate tables at the same time.

    How Mondrian recognizes aggregate table names and columns pretty much dictates how one must name those table names and columns when creating them in the first place!

    -

    5.1 Rules 

    -

    Rules are templates, designed to work for all fact table names +

    Rules are templates, designed to work for all fact table names and their column -names. -These rules are templates of regular expressions +names. +These rules are templates of regular expressions that are instantiated with the names of a fact table and its columns. In order to describe the rule templates, a name that instantiate a rule are represented in a rule by have the name bracketed -by "${" and "}". As an example, -"abc_${name}_xyz" +by "${" and "}". As an example, +"abc_${name}_xyz" is a rule parameterized -by "name". When name is "john" the template becomes +by "name". When name is "john" the template becomes "abc_john_xyz".

    The regular expression engine used here and a definition of @@ -983,7 +987,7 @@

    5.1 Rules 

    agg_.+_${fact_table_name}
    - +

    which is parameterized with the fact table's name. (In addition, this rule is applied in "ignore case" mode.) This means that an aggregate table's name must start with @@ -993,8 +997,8 @@

    5.1 Rules 

    in a regular expression - it matches one or more characters.

    As an example of applying the aggregate table name rule, -let the fact table be called -sales_fact_1997, the +let the fact table be called +sales_fact_1997, the Sales cube's fact table from the FoodMart schema. Applying the @@ -1004,7 +1008,7 @@

    5.1 Rules 

    agg_.+_sales_fact_1997
    - +

    This will match the following table names:

    At this point, matches are looked for the level and measure columns. -Both of these matching rules are multi-part - has sub rules; +Both of these matching rules are multi-part - has sub rules; each rule has more than one possible regular expression that might match where a match on any one is a match.

    @@ -1079,7 +1083,7 @@

    5.1 Rules 

    There are three sub rules for matching level columns. Each is a template which is parameterized with 1) the fact table's cube's dimension hierarchy's name, "hierarchy_name", 2) the fact table's cube's dimension -hierarchy's level name, "level_name", 3) the dimension table's level +hierarchy's level name, "level_name", 3) the dimension table's level column name, "level_column_name", and 4) a usage prefix, "usage_prefix", which in most cases is null":

    @@ -1091,7 +1095,7 @@

    5.1 Rules 

  • ${level_column_name}
  • -

    The "usage_prefix" is the value of the +

    The "usage_prefix" is the value of the DimensionUsage's or private Dimension's @@ -1100,22 +1104,22 @@

    5.1 Rules 

    attribute. It can be the case that a "level_column_name", the name of a dimension's level column, is the same for more than one dimension. -During aggregate recognition for collapsed dimension aggregates +During aggregate recognition for collapsed dimension aggregates where the base fact table has two or more dimensions with common column names, the attempted recognition will fail unless in the -schema catalog the +schema catalog the usagePrefix attribute is used to disambiguate those column names. Of course, one must also remember to prefix the the column in the aggregate table with the same prefix.

    -

    As an example of +

    As an example of usagePrefix, consider a fact table named -ORDERS which has two +ORDERS which has two DimensionUsages, one for the CUSTOMER dimension -and the other for the +and the other for the WHOLESALER dimension where each dimension has a level column named @@ -1131,25 +1135,25 @@

    5.1 Rules 

    table column was named WS_CUST_NM, then the recognizer could associate the -column with the +column with the WHOLESALER dimension.

    In the case of a private -Dimension, a +Dimension, a usagePrefix need only be used if there is a public, shared -Dimension that has the same name and has a +Dimension that has the same name and has a "level_column_name" that is also the same. -Without the +Without the usagePrefix there would be no way of disambiguating collapsed dimension aggregate tables.

    If any of these parameters have space characters, ' ', these are mapped to underscore characters, '_', and, similarly, dot characters, '.', are also mapped to underscores. -So, if the hierarchy_name is "Time", level_name is "Month" and -level_column_name is month_of_year, the possible aggregate table column +So, if the hierarchy_name is "Time", level_name is "Month" and +level_column_name is month_of_year, the possible aggregate table column names are: