From 5dd56c4f4c6ca8300eba62ca4001405d7bf9e12a Mon Sep 17 00:00:00 2001 From: Karan980 Date: Fri, 11 Dec 2020 00:06:00 +0530 Subject: [PATCH] [WIP] UT Improvement for load/compaction --- .../org/apache/carbondata/geo/GeoTest.scala | 24 +++++++++ .../createTable/TestCreateTableLike.scala | 23 ++++++++ .../carbondata/CarbonDataSourceSuite.scala | 3 +- .../bucketing/TableBucketingTestCase.scala | 16 +++++- .../ChangeDataTypeTestCases.scala | 54 +++++++++++++++++++ 5 files changed, 118 insertions(+), 2 deletions(-) diff --git a/integration/spark/src/test/scala/org/apache/carbondata/geo/GeoTest.scala b/integration/spark/src/test/scala/org/apache/carbondata/geo/GeoTest.scala index fa383f32b0c..6baa25a8f3e 100644 --- a/integration/spark/src/test/scala/org/apache/carbondata/geo/GeoTest.scala +++ b/integration/spark/src/test/scala/org/apache/carbondata/geo/GeoTest.scala @@ -123,6 +123,18 @@ class GeoTest extends QueryTest with BeforeAndAfterAll with BeforeAndAfterEach { assert(exception.getMessage.contains( s"mygeohash is a spatial index column and is not allowed for " + s"the option(s): bucket_columns")) + + exception = intercept[MalformedCarbonCommandException]( + createTable(table1, " 'LOCAL_DICTIONARY_INCLUDE'='mygeohash', ")) + assert(exception.getMessage.contains( + s"mygeohash is a spatial index column and is not allowed for " + + s"the option(s): local_dictionary_include")) + + exception = intercept[MalformedCarbonCommandException]( + createTable(table1, " 'LONG_STRING_COLUMNS'='mygeohash', ")) + assert(exception.getMessage.contains( + s"mygeohash is a spatial index column and is not allowed for " + + s"the option(s): long_string_columns")) } test("test alter table with invalid table properties") { @@ -145,6 +157,18 @@ class GeoTest extends QueryTest with BeforeAndAfterAll with BeforeAndAfterEach { assert(exception.getMessage.contains( s"mygeohash is a spatial index column and is not allowed for " + s"the option(s): column_meta_cache")) + + exception = intercept[RuntimeException]( + sql(s"ALTER TABLE $table1 SET TBLPROPERTIES('LOCAL_DICTIONARY_INCLUDE' = 'mygeohash')")) + assert(exception.getMessage.contains( + s"mygeohash is a spatial index column and is not allowed for " + + s"the option(s): local_dictionary_include")) + + exception = intercept[RuntimeException]( + sql(s"ALTER TABLE $table1 SET TBLPROPERTIES('LONG_STRING_COLUMNS' = 'mygeohash')")) + assert(exception.getMessage.contains( + s"mygeohash is a spatial index column and is not allowed for " + + s"the option(s): long_string_columns")) } test("test materialized view with spatial column") { diff --git a/integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/createTable/TestCreateTableLike.scala b/integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/createTable/TestCreateTableLike.scala index 8d2d9bfb2bf..56f94d75c3b 100644 --- a/integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/createTable/TestCreateTableLike.scala +++ b/integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/createTable/TestCreateTableLike.scala @@ -204,4 +204,27 @@ class TestCreateTableLike extends QueryTest with BeforeAndAfterEach with BeforeA checkTableProperties(TableIdentifier("sourceTable"), TableIdentifier("targetTable")) } + test("test create table like with geoTable") { + sql("drop table if exists geoTable") + sql(s""" + | CREATE TABLE geoTable( + | timevalue BIGINT, + | longitude LONG, + | latitude LONG) COMMENT "This is a GeoTable" + | STORED AS carbondata + | TBLPROPERTIES ('SPATIAL_INDEX'='mygeohash', + | 'SPATIAL_INDEX.mygeohash.type'='geohash', + | 'SPATIAL_INDEX.mygeohash.sourcecolumns'='longitude, latitude', + | 'SPATIAL_INDEX.mygeohash.originLatitude'='39.832277', + | 'SPATIAL_INDEX.mygeohash.gridSize'='50', + | 'SPATIAL_INDEX.mygeohash.minLongitude'='115.811865', + | 'SPATIAL_INDEX.mygeohash.maxLongitude'='116.782233', + | 'SPATIAL_INDEX.mygeohash.minLatitude'='39.832277', + | 'SPATIAL_INDEX.mygeohash.maxLatitude'='40.225281', + | 'SPATIAL_INDEX.mygeohash.conversionRatio'='1000000') + """.stripMargin) + sql("create table targetTable like geoTable") + checkTableProperties(TableIdentifier("geoTable"), TableIdentifier("targetTable")) + } + } diff --git a/integration/spark/src/test/scala/org/apache/spark/carbondata/CarbonDataSourceSuite.scala b/integration/spark/src/test/scala/org/apache/spark/carbondata/CarbonDataSourceSuite.scala index b9e94cfca6c..51a58dd6fa1 100644 --- a/integration/spark/src/test/scala/org/apache/spark/carbondata/CarbonDataSourceSuite.scala +++ b/integration/spark/src/test/scala/org/apache/spark/carbondata/CarbonDataSourceSuite.scala @@ -238,11 +238,12 @@ class CarbonDataSourceSuite extends QueryTest with BeforeAndAfterAll { test("test to create bucket columns with complex data type field") { sql("drop table if exists create_source") - intercept[Exception] { + val ex = intercept[Exception] { sql("create table create_source(intField int, stringField string, " + "complexField array) USING carbondata " + "OPTIONS('bucket_number'='1', 'bucket_columns'='complexField')") } + assert(ex.getMessage.contains("Bucket field should not be complex column or decimal data type")) } test("test check results of table with complex data type and bucketing") { diff --git a/integration/spark/src/test/scala/org/apache/spark/carbondata/bucketing/TableBucketingTestCase.scala b/integration/spark/src/test/scala/org/apache/spark/carbondata/bucketing/TableBucketingTestCase.scala index edd7c9e47ff..70af572057e 100644 --- a/integration/spark/src/test/scala/org/apache/spark/carbondata/bucketing/TableBucketingTestCase.scala +++ b/integration/spark/src/test/scala/org/apache/spark/carbondata/bucketing/TableBucketingTestCase.scala @@ -20,7 +20,7 @@ package org.apache.spark.carbondata.bucketing import java.sql.Date import java.text.SimpleDateFormat -import org.apache.spark.sql.{CarbonEnv, Row} +import org.apache.spark.sql.{AnalysisException, CarbonEnv, Row} import org.apache.spark.sql.execution.WholeStageCodegenExec import org.apache.spark.sql.execution.exchange.Exchange import org.apache.spark.sql.execution.joins.SortMergeJoinExec @@ -139,6 +139,20 @@ class TableBucketingTestCase extends QueryTest with BeforeAndAfterAll { sql("DROP TABLE IF EXISTS table_bucket") } + test("test bucketing with column not present in the table") { + sql("DROP TABLE IF EXISTS table_bucket") + val ex = intercept[AnalysisException] { + sql( + """ + CREATE TABLE IF NOT EXISTS table_bucket + (ID Int, date DATE, starttime Timestamp, country String, + name String, phonetype String, serialname String, salary Int) + STORED AS carbondata TBLPROPERTIES ('BUCKET_NUMBER'='2', 'BUCKET_COLUMNS'='abc') + """) + } + assert(ex.message.contains("Bucket field is not present in table columns")) + } + test("test IUD of bucket table") { sql("CREATE TABLE t40 (ID Int, date Timestamp, country String, name String, phonetype String," + "serialname String, salary Int) STORED AS carbondata TBLPROPERTIES " + diff --git a/integration/spark/src/test/scala/org/apache/spark/carbondata/restructure/vectorreader/ChangeDataTypeTestCases.scala b/integration/spark/src/test/scala/org/apache/spark/carbondata/restructure/vectorreader/ChangeDataTypeTestCases.scala index 6e60c453a78..5848eb70c6a 100644 --- a/integration/spark/src/test/scala/org/apache/spark/carbondata/restructure/vectorreader/ChangeDataTypeTestCases.scala +++ b/integration/spark/src/test/scala/org/apache/spark/carbondata/restructure/vectorreader/ChangeDataTypeTestCases.scala @@ -19,11 +19,15 @@ package org.apache.spark.carbondata.restructure.vectorreader import java.math.BigDecimal +import scala.collection.JavaConverters._ + import org.apache.spark.sql.Row import org.apache.spark.sql.test.util.QueryTest import org.scalatest.BeforeAndAfterAll import org.apache.carbondata.core.constants.CarbonCommonConstants +import org.apache.carbondata.core.metadata.CarbonMetadata +import org.apache.carbondata.core.statusmanager.SegmentStatusManager class ChangeDataTypeTestCases extends QueryTest with BeforeAndAfterAll { @@ -103,6 +107,56 @@ class ChangeDataTypeTestCases extends QueryTest with BeforeAndAfterAll { test_change_int_and_load() } + test("test change int datatype of range column and load data") { + beforeAll + sql("CREATE TABLE changedatatypetest (ID Int, date Timestamp, country String," + + " name String, phonetype String, serialname String, salary Int) STORED AS carbondata" + + " TBLPROPERTIES ('range_column'='salary')") + sql(s"LOAD DATA INPATH '$resourcesPath/source.csv' INTO TABLE changedatatypetest") + sql("ALTER TABLE changedatatypetest CHANGE salary salary BIGINT") + sql(s"LOAD DATA INPATH '$resourcesPath/source.csv' INTO TABLE changedatatypetest") + checkAnswer(sql("SELECT count(*) FROM changedatatypetest"), Row(200)) + afterAll + } + + test("test change int datatype for bucket column and load data") { + beforeAll + sql("CREATE TABLE changedatatypetest (ID Int, date Timestamp, country String," + + " name String, phonetype String, serialname String, salary Int) STORED AS carbondata" + + " TBLPROPERTIES ('BUCKET_NUMBER'='10', 'BUCKET_COLUMNS'='salary')") + sql(s"LOAD DATA INPATH '$resourcesPath/source.csv' INTO TABLE changedatatypetest") + sql("ALTER TABLE changedatatypetest CHANGE salary salary BIGINT") + sql(s"LOAD DATA INPATH '$resourcesPath/source.csv' INTO TABLE changedatatypetest") + checkAnswer(sql("SELECT count(*) FROM changedatatypetest"), Row(200)) + afterAll + } + + test("perform compaction after altering datatype of bucket column") { + beforeAll + sql("CREATE TABLE changedatatypetest (ID Int, date Timestamp, country String," + + " name String, phonetype String, serialname String, salary Int) STORED AS carbondata" + + " TBLPROPERTIES ('BUCKET_NUMBER'='10', 'BUCKET_COLUMNS'='salary')") + sql(s"LOAD DATA INPATH '$resourcesPath/source.csv' INTO TABLE changedatatypetest") + sql("ALTER TABLE changedatatypetest CHANGE salary salary BIGINT") + sql(s"LOAD DATA INPATH '$resourcesPath/source.csv' INTO TABLE changedatatypetest") + sql(s"LOAD DATA INPATH '$resourcesPath/source.csv' INTO TABLE changedatatypetest") + sql(s"LOAD DATA INPATH '$resourcesPath/source.csv' INTO TABLE changedatatypetest") + sql("ALTER TABLE changedatatypetest COMPACT 'minor'").collect() + checkAnswer(sql("SELECT count(*) FROM changedatatypetest"), Row(400)) + val carbonTable = CarbonMetadata.getInstance().getCarbonTable( + CarbonCommonConstants.DATABASE_DEFAULT_NAME, "changedatatypetest") + val absoluteTableIdentifier = carbonTable.getAbsoluteTableIdentifier + val segmentStatusManager: SegmentStatusManager = + new SegmentStatusManager(absoluteTableIdentifier) + val segments = segmentStatusManager.getValidAndInvalidSegments.getValidSegments.asScala + .map(_.getSegmentNo) + .toList + assert(segments.contains("0.1")) + assert(!segments.contains("0")) + assert(!segments.contains("1")) + afterAll + } + test("test change decimal datatype and compaction") { def test_change_decimal_and_compaction(): Unit = { beforeAll