From 5dd56c4f4c6ca8300eba62ca4001405d7bf9e12a Mon Sep 17 00:00:00 2001
From: Karan980 <karan.bajwa980@gmail.com>
Date: Fri, 11 Dec 2020 00:06:00 +0530
Subject: [PATCH] [WIP] UT Improvement for load/compaction

---
 .../org/apache/carbondata/geo/GeoTest.scala   | 24 +++++++++
 .../createTable/TestCreateTableLike.scala     | 23 ++++++++
 .../carbondata/CarbonDataSourceSuite.scala    |  3 +-
 .../bucketing/TableBucketingTestCase.scala    | 16 +++++-
 .../ChangeDataTypeTestCases.scala             | 54 +++++++++++++++++++
 5 files changed, 118 insertions(+), 2 deletions(-)

diff --git a/integration/spark/src/test/scala/org/apache/carbondata/geo/GeoTest.scala b/integration/spark/src/test/scala/org/apache/carbondata/geo/GeoTest.scala
index fa383f32b0c..6baa25a8f3e 100644
--- a/integration/spark/src/test/scala/org/apache/carbondata/geo/GeoTest.scala
+++ b/integration/spark/src/test/scala/org/apache/carbondata/geo/GeoTest.scala
@@ -123,6 +123,18 @@ class GeoTest extends QueryTest with BeforeAndAfterAll with BeforeAndAfterEach {
     assert(exception.getMessage.contains(
       s"mygeohash is a spatial index column and is not allowed for " +
       s"the option(s): bucket_columns"))
+
+    exception = intercept[MalformedCarbonCommandException](
+      createTable(table1, " 'LOCAL_DICTIONARY_INCLUDE'='mygeohash', "))
+    assert(exception.getMessage.contains(
+      s"mygeohash is a spatial index column and is not allowed for " +
+        s"the option(s): local_dictionary_include"))
+
+    exception = intercept[MalformedCarbonCommandException](
+      createTable(table1, " 'LONG_STRING_COLUMNS'='mygeohash', "))
+    assert(exception.getMessage.contains(
+      s"mygeohash is a spatial index column and is not allowed for " +
+        s"the option(s): long_string_columns"))
   }
 
   test("test alter table with invalid table properties") {
@@ -145,6 +157,18 @@ class GeoTest extends QueryTest with BeforeAndAfterAll with BeforeAndAfterEach {
     assert(exception.getMessage.contains(
       s"mygeohash is a spatial index column and is not allowed for " +
       s"the option(s): column_meta_cache"))
+
+    exception = intercept[RuntimeException](
+      sql(s"ALTER TABLE $table1 SET TBLPROPERTIES('LOCAL_DICTIONARY_INCLUDE' = 'mygeohash')"))
+    assert(exception.getMessage.contains(
+      s"mygeohash is a spatial index column and is not allowed for " +
+        s"the option(s): local_dictionary_include"))
+
+    exception = intercept[RuntimeException](
+      sql(s"ALTER TABLE $table1 SET TBLPROPERTIES('LONG_STRING_COLUMNS' = 'mygeohash')"))
+    assert(exception.getMessage.contains(
+      s"mygeohash is a spatial index column and is not allowed for " +
+        s"the option(s): long_string_columns"))
   }
 
   test("test materialized view with spatial column") {
diff --git a/integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/createTable/TestCreateTableLike.scala b/integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/createTable/TestCreateTableLike.scala
index 8d2d9bfb2bf..56f94d75c3b 100644
--- a/integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/createTable/TestCreateTableLike.scala
+++ b/integration/spark/src/test/scala/org/apache/carbondata/spark/testsuite/createTable/TestCreateTableLike.scala
@@ -204,4 +204,27 @@ class TestCreateTableLike extends QueryTest with BeforeAndAfterEach with BeforeA
     checkTableProperties(TableIdentifier("sourceTable"), TableIdentifier("targetTable"))
   }
 
+  test("test create table like with geoTable") {
+    sql("drop table if exists geoTable")
+    sql(s"""
+           | CREATE TABLE geoTable(
+           | timevalue BIGINT,
+           | longitude LONG,
+           | latitude LONG) COMMENT "This is a GeoTable"
+           | STORED AS carbondata
+           | TBLPROPERTIES ('SPATIAL_INDEX'='mygeohash',
+           | 'SPATIAL_INDEX.mygeohash.type'='geohash',
+           | 'SPATIAL_INDEX.mygeohash.sourcecolumns'='longitude, latitude',
+           | 'SPATIAL_INDEX.mygeohash.originLatitude'='39.832277',
+           | 'SPATIAL_INDEX.mygeohash.gridSize'='50',
+           | 'SPATIAL_INDEX.mygeohash.minLongitude'='115.811865',
+           | 'SPATIAL_INDEX.mygeohash.maxLongitude'='116.782233',
+           | 'SPATIAL_INDEX.mygeohash.minLatitude'='39.832277',
+           | 'SPATIAL_INDEX.mygeohash.maxLatitude'='40.225281',
+           | 'SPATIAL_INDEX.mygeohash.conversionRatio'='1000000')
+       """.stripMargin)
+    sql("create table targetTable like geoTable")
+    checkTableProperties(TableIdentifier("geoTable"), TableIdentifier("targetTable"))
+  }
+
 }
diff --git a/integration/spark/src/test/scala/org/apache/spark/carbondata/CarbonDataSourceSuite.scala b/integration/spark/src/test/scala/org/apache/spark/carbondata/CarbonDataSourceSuite.scala
index b9e94cfca6c..51a58dd6fa1 100644
--- a/integration/spark/src/test/scala/org/apache/spark/carbondata/CarbonDataSourceSuite.scala
+++ b/integration/spark/src/test/scala/org/apache/spark/carbondata/CarbonDataSourceSuite.scala
@@ -238,11 +238,12 @@ class CarbonDataSourceSuite extends QueryTest with BeforeAndAfterAll {
 
   test("test to create bucket columns with complex data type field") {
     sql("drop table if exists create_source")
-    intercept[Exception] {
+    val ex = intercept[Exception] {
       sql("create table create_source(intField int, stringField string, " +
           "complexField array<string>) USING carbondata " +
           "OPTIONS('bucket_number'='1', 'bucket_columns'='complexField')")
     }
+    assert(ex.getMessage.contains("Bucket field should not be complex column or decimal data type"))
   }
 
   test("test check results of table with complex data type and bucketing") {
diff --git a/integration/spark/src/test/scala/org/apache/spark/carbondata/bucketing/TableBucketingTestCase.scala b/integration/spark/src/test/scala/org/apache/spark/carbondata/bucketing/TableBucketingTestCase.scala
index edd7c9e47ff..70af572057e 100644
--- a/integration/spark/src/test/scala/org/apache/spark/carbondata/bucketing/TableBucketingTestCase.scala
+++ b/integration/spark/src/test/scala/org/apache/spark/carbondata/bucketing/TableBucketingTestCase.scala
@@ -20,7 +20,7 @@ package org.apache.spark.carbondata.bucketing
 import java.sql.Date
 import java.text.SimpleDateFormat
 
-import org.apache.spark.sql.{CarbonEnv, Row}
+import org.apache.spark.sql.{AnalysisException, CarbonEnv, Row}
 import org.apache.spark.sql.execution.WholeStageCodegenExec
 import org.apache.spark.sql.execution.exchange.Exchange
 import org.apache.spark.sql.execution.joins.SortMergeJoinExec
@@ -139,6 +139,20 @@ class TableBucketingTestCase extends QueryTest with BeforeAndAfterAll {
     sql("DROP TABLE IF EXISTS table_bucket")
   }
 
+  test("test bucketing with column not present in the table") {
+    sql("DROP TABLE IF EXISTS table_bucket")
+    val ex = intercept[AnalysisException] {
+      sql(
+        """
+           CREATE TABLE IF NOT EXISTS table_bucket
+           (ID Int, date DATE, starttime Timestamp, country String,
+           name String, phonetype String, serialname String, salary Int)
+           STORED AS carbondata TBLPROPERTIES ('BUCKET_NUMBER'='2', 'BUCKET_COLUMNS'='abc')
+        """)
+    }
+    assert(ex.message.contains("Bucket field is not present in table columns"))
+  }
+
   test("test IUD of bucket table") {
     sql("CREATE TABLE t40 (ID Int, date Timestamp, country String, name String, phonetype String," +
       "serialname String, salary Int) STORED AS carbondata TBLPROPERTIES " +
diff --git a/integration/spark/src/test/scala/org/apache/spark/carbondata/restructure/vectorreader/ChangeDataTypeTestCases.scala b/integration/spark/src/test/scala/org/apache/spark/carbondata/restructure/vectorreader/ChangeDataTypeTestCases.scala
index 6e60c453a78..5848eb70c6a 100644
--- a/integration/spark/src/test/scala/org/apache/spark/carbondata/restructure/vectorreader/ChangeDataTypeTestCases.scala
+++ b/integration/spark/src/test/scala/org/apache/spark/carbondata/restructure/vectorreader/ChangeDataTypeTestCases.scala
@@ -19,11 +19,15 @@ package org.apache.spark.carbondata.restructure.vectorreader
 
 import java.math.BigDecimal
 
+import scala.collection.JavaConverters._
+
 import org.apache.spark.sql.Row
 import org.apache.spark.sql.test.util.QueryTest
 import org.scalatest.BeforeAndAfterAll
 
 import org.apache.carbondata.core.constants.CarbonCommonConstants
+import org.apache.carbondata.core.metadata.CarbonMetadata
+import org.apache.carbondata.core.statusmanager.SegmentStatusManager
 
 class ChangeDataTypeTestCases extends QueryTest with BeforeAndAfterAll {
 
@@ -103,6 +107,56 @@ class ChangeDataTypeTestCases extends QueryTest with BeforeAndAfterAll {
     test_change_int_and_load()
   }
 
+  test("test change int datatype of range column and load data") {
+    beforeAll
+    sql("CREATE TABLE changedatatypetest (ID Int, date Timestamp, country String," +
+        " name String, phonetype String, serialname String, salary Int) STORED AS carbondata" +
+        " TBLPROPERTIES ('range_column'='salary')")
+    sql(s"LOAD DATA INPATH '$resourcesPath/source.csv' INTO TABLE changedatatypetest")
+    sql("ALTER TABLE changedatatypetest CHANGE salary salary BIGINT")
+    sql(s"LOAD DATA INPATH '$resourcesPath/source.csv' INTO TABLE changedatatypetest")
+    checkAnswer(sql("SELECT count(*) FROM changedatatypetest"), Row(200))
+    afterAll
+  }
+
+  test("test change int datatype for bucket column and load data") {
+    beforeAll
+    sql("CREATE TABLE changedatatypetest (ID Int, date Timestamp, country String," +
+      " name String, phonetype String, serialname String, salary Int) STORED AS carbondata" +
+      " TBLPROPERTIES ('BUCKET_NUMBER'='10', 'BUCKET_COLUMNS'='salary')")
+    sql(s"LOAD DATA INPATH '$resourcesPath/source.csv' INTO TABLE changedatatypetest")
+    sql("ALTER TABLE changedatatypetest CHANGE salary salary BIGINT")
+    sql(s"LOAD DATA INPATH '$resourcesPath/source.csv' INTO TABLE changedatatypetest")
+    checkAnswer(sql("SELECT count(*) FROM changedatatypetest"), Row(200))
+    afterAll
+  }
+
+  test("perform compaction after altering datatype of bucket column") {
+    beforeAll
+    sql("CREATE TABLE changedatatypetest (ID Int, date Timestamp, country String," +
+        " name String, phonetype String, serialname String, salary Int) STORED AS carbondata" +
+        " TBLPROPERTIES ('BUCKET_NUMBER'='10', 'BUCKET_COLUMNS'='salary')")
+    sql(s"LOAD DATA INPATH '$resourcesPath/source.csv' INTO TABLE changedatatypetest")
+    sql("ALTER TABLE changedatatypetest CHANGE salary salary BIGINT")
+    sql(s"LOAD DATA INPATH '$resourcesPath/source.csv' INTO TABLE changedatatypetest")
+    sql(s"LOAD DATA INPATH '$resourcesPath/source.csv' INTO TABLE changedatatypetest")
+    sql(s"LOAD DATA INPATH '$resourcesPath/source.csv' INTO TABLE changedatatypetest")
+    sql("ALTER TABLE changedatatypetest COMPACT 'minor'").collect()
+    checkAnswer(sql("SELECT count(*) FROM changedatatypetest"), Row(400))
+    val carbonTable = CarbonMetadata.getInstance().getCarbonTable(
+      CarbonCommonConstants.DATABASE_DEFAULT_NAME, "changedatatypetest")
+    val absoluteTableIdentifier = carbonTable.getAbsoluteTableIdentifier
+    val segmentStatusManager: SegmentStatusManager =
+      new SegmentStatusManager(absoluteTableIdentifier)
+    val segments = segmentStatusManager.getValidAndInvalidSegments.getValidSegments.asScala
+      .map(_.getSegmentNo)
+      .toList
+    assert(segments.contains("0.1"))
+    assert(!segments.contains("0"))
+    assert(!segments.contains("1"))
+    afterAll
+  }
+
   test("test change decimal datatype and compaction") {
     def test_change_decimal_and_compaction(): Unit = {
       beforeAll