From 992815111b55020e2d67fc35ae49e44fa5f3e8cc Mon Sep 17 00:00:00 2001 From: William Allen <16820599+williamjallen@users.noreply.github.com> Date: Sun, 10 Sep 2023 21:39:13 -0400 Subject: [PATCH] [Bugfix:Plagiarism] Bump base Docker image to Ubuntu 22.04 (#104) ### What is the current behavior? Our Docker image is currently based on Ubuntu 20.04. This is problematic because the main Lichen binary is compiled on the host machine and copied into the Lichen container at runtime, which requires both operating systems to be compatible. Eventually, we should do all of the compilation at image build time instead... ### What is the new behavior? The base Docker image has been bumped to Ubuntu 22.04. Due to a handful of version incompatibilities and other miscellaneous Lichen bugs, I also bumped clang to version 14 (the C++ tokenizer was broken), bumped python to 3.9, and fixed a regression introduced in https://github.com/Submitty/Submitty/pull/9630. Further Python version increases will break the Java tokenizer. I will make a separate PR to migrate away from `javac-parser`, which hasn't been updated in 5 years now, and seems to be a dead project. --- .github/workflows/lichen_ci.yml | 8 ++++---- .github/workflows/pylint.yml | 2 +- Dockerfile | 6 +++--- bin/concatenate_all.py | 12 ++++++------ compare_hashes/compare_hashes.cpp | 4 ++-- compare_hashes/lichen_config.h | 2 +- install_lichen.sh | 2 +- .../multiple_versions/expected_output/config.json | 2 +- .../test_lichen/multiple_versions/input/config.json | 2 +- .../repeated_sequences/expected_output/config.json | 2 +- .../test_lichen/repeated_sequences/input/config.json | 2 +- tokenizer/c/c_tokenizer.py | 6 ++---- 12 files changed, 24 insertions(+), 26 deletions(-) diff --git a/.github/workflows/lichen_ci.yml b/.github/workflows/lichen_ci.yml index 2603b7c..792f2e4 100644 --- a/.github/workflows/lichen_ci.yml +++ b/.github/workflows/lichen_ci.yml @@ -3,11 +3,11 @@ name: Lichen CI on: [push, pull_request] env: - PYTHON_VERSION: 3.8 + PYTHON_VERSION: '3.9' jobs: python-unit-tests: - runs-on: ubuntu-20.04 + runs-on: ubuntu-22.04 steps: - uses: actions/checkout@v2 - uses: actions/setup-python@v2 @@ -19,14 +19,14 @@ jobs: - name: Install Tokenizer Dependencies run: | sudo apt-get update - sudo apt-get install -y clang-6.0 + sudo apt-get install -y clang-14 - name: Run Unit Tests run: | cd tests/unittest python3 -m unittest discover test-lichen-integration: - runs-on: ubuntu-20.04 + runs-on: ubuntu-22.04 steps: - uses: actions/checkout@v2 - name: Install Lichen diff --git a/.github/workflows/pylint.yml b/.github/workflows/pylint.yml index 134c7f0..82d149e 100644 --- a/.github/workflows/pylint.yml +++ b/.github/workflows/pylint.yml @@ -4,7 +4,7 @@ on: [push] jobs: python-lint: - runs-on: ubuntu-20.04 + runs-on: ubuntu-22.04 steps: - uses: actions/checkout@v2 - uses: actions/setup-python@v2 diff --git a/Dockerfile b/Dockerfile index 9d3797a..ce82ccb 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -FROM ubuntu:20.04 +FROM ubuntu:22.04 ARG DEBIAN_FRONTEND=noninteractive @@ -6,9 +6,9 @@ ARG DEBIAN_FRONTEND=noninteractive RUN apt-get update \ && apt-get install -y \ libboost-all-dev \ - python3.8 \ + python3.9 \ python3-pip \ - clang-6.0 \ + clang-14 \ default-jdk # Python Dependencies diff --git a/bin/concatenate_all.py b/bin/concatenate_all.py index 63f50f9..c1cbf4c 100644 --- a/bin/concatenate_all.py +++ b/bin/concatenate_all.py @@ -176,10 +176,10 @@ def validate(config, args): # check permissions to make sure we have access to the other gradeables my_course_group_perms = Path(args.basepath).group() for gradeable in other_gradeables: - if Path(args.datapath, gradeable["other_semester"], gradeable["other_course"]).group()\ + if Path(args.datapath, gradeable["other_term"], gradeable["other_course"]).group()\ != my_course_group_perms: raise SystemExit("ERROR: Invalid permissions to access course " - f"{gradeable['other_semester']}/{gradeable['other_course']}") + f"{gradeable['other_term']}/{gradeable['other_course']}") # check permissions for each path we are given (if any are provided) if config.get("other_gradeable_paths") is not None: @@ -211,7 +211,7 @@ def main(): validate(config, args) # parameters to be used in this file - semester = config["semester"] + term = config["term"] course = config["course"] gradeable = config["gradeable"] regex_patterns = config["regex"] @@ -225,7 +225,7 @@ def main(): total_concat = 0 for dir in regex_dirs: - input_path = os.path.join(args.datapath, semester, course, dir, gradeable) + input_path = os.path.join(args.datapath, term, course, dir, gradeable) output_path = os.path.join(args.basepath, "users") total_concat = processGradeable(args.basepath, config, input_path, output_path, total_concat) @@ -235,13 +235,13 @@ def main(): for other_gradeable in other_gradeables: for dir in regex_dirs: input_path = os.path.join(args.datapath, - other_gradeable["other_semester"], + other_gradeable["other_term"], other_gradeable["other_course"], dir, other_gradeable["other_gradeable"]) output_path = os.path.join(args.basepath, "other_gradeables", - f"{other_gradeable['other_semester']}__{other_gradeable['other_course']}__{other_gradeable['other_gradeable']}") # noqa: E501 + f"{other_gradeable['other_term']}__{other_gradeable['other_course']}__{other_gradeable['other_gradeable']}") # noqa: E501 total_concat = processGradeable(args.basepath, config, input_path, output_path, total_concat) diff --git a/compare_hashes/compare_hashes.cpp b/compare_hashes/compare_hashes.cpp index d2867b1..44c7ea1 100644 --- a/compare_hashes/compare_hashes.cpp +++ b/compare_hashes/compare_hashes.cpp @@ -103,7 +103,7 @@ int main(int argc, char* argv[]) { assert(istr.good()); nlohmann::json config_file_json = nlohmann::json::parse(istr); - config.semester = config_file_json.value("semester", "ERROR"); + config.term = config_file_json.value("term", "ERROR"); config.course = config_file_json.value("course", "ERROR"); config.gradeable = config_file_json.value("gradeable", "ERROR"); config.hash_size = config_file_json.value("hash_size", 1); @@ -218,7 +218,7 @@ int main(int argc, char* argv[]) { while (istr >> input_hash_str) { hash input_hash = (unsigned int)(stoul(input_hash_str, 0, 16)); location++; - all_hashes[input_hash][username].push_back(HashLocation(username, version, location, config.semester + "__" + config.course + "__" + config.gradeable)); + all_hashes[input_hash][username].push_back(HashLocation(username, version, location, config.term + "__" + config.course + "__" + config.gradeable)); curr_submission->addHash(input_hash, location); } diff --git a/compare_hashes/lichen_config.h b/compare_hashes/lichen_config.h index 03c54cb..09e9c2e 100644 --- a/compare_hashes/lichen_config.h +++ b/compare_hashes/lichen_config.h @@ -2,7 +2,7 @@ #define LICHEN_CONFIG_H struct LichenConfig { - std::string semester; + std::string term; std::string course; std::string gradeable; int hash_size; diff --git a/install_lichen.sh b/install_lichen.sh index 801eade..1f0bc34 100755 --- a/install_lichen.sh +++ b/install_lichen.sh @@ -20,7 +20,7 @@ cp -r "$lichen_repository_dir"/* "$lichen_installation_dir" # install C++ dependencies apt-get update -apt-get install -y clang-6.0 libboost-all-dev +apt-get install -y clang-14 libboost-all-dev #################################################################################################### # Install Python Dependencies locally (for concatenation) diff --git a/tests/data/test_lichen/multiple_versions/expected_output/config.json b/tests/data/test_lichen/multiple_versions/expected_output/config.json index 9687866..e1de577 100644 --- a/tests/data/test_lichen/multiple_versions/expected_output/config.json +++ b/tests/data/test_lichen/multiple_versions/expected_output/config.json @@ -1,5 +1,5 @@ { - "semester": "f21", + "term": "f21", "course": "plagiarism", "gradeable": "multiple_versions", "config_id": 1, diff --git a/tests/data/test_lichen/multiple_versions/input/config.json b/tests/data/test_lichen/multiple_versions/input/config.json index 9687866..e1de577 100644 --- a/tests/data/test_lichen/multiple_versions/input/config.json +++ b/tests/data/test_lichen/multiple_versions/input/config.json @@ -1,5 +1,5 @@ { - "semester": "f21", + "term": "f21", "course": "plagiarism", "gradeable": "multiple_versions", "config_id": 1, diff --git a/tests/data/test_lichen/repeated_sequences/expected_output/config.json b/tests/data/test_lichen/repeated_sequences/expected_output/config.json index 700cc3a..af54b5b 100644 --- a/tests/data/test_lichen/repeated_sequences/expected_output/config.json +++ b/tests/data/test_lichen/repeated_sequences/expected_output/config.json @@ -1,5 +1,5 @@ { - "semester": "f21", + "term": "f21", "course": "plagiarism", "gradeable": "repeated_sequences", "config_id": "1", diff --git a/tests/data/test_lichen/repeated_sequences/input/config.json b/tests/data/test_lichen/repeated_sequences/input/config.json index 700cc3a..af54b5b 100644 --- a/tests/data/test_lichen/repeated_sequences/input/config.json +++ b/tests/data/test_lichen/repeated_sequences/input/config.json @@ -1,5 +1,5 @@ { - "semester": "f21", + "term": "f21", "course": "plagiarism", "gradeable": "repeated_sequences", "config_id": "1", diff --git a/tokenizer/c/c_tokenizer.py b/tokenizer/c/c_tokenizer.py index 37ffba6..4a8bca6 100644 --- a/tokenizer/c/c_tokenizer.py +++ b/tokenizer/c/c_tokenizer.py @@ -24,10 +24,8 @@ def main(): # copy the concatenated file to the temporary file location shutil.copy(args.input_file, tmp_cpp_file_name) - if (os.path.isfile('/usr/lib/llvm-6.0/lib/libclang.so.1')): - clang.cindex.Config.set_library_file('/usr/lib/llvm-6.0/lib/libclang.so.1') - elif (os.path.isfile('/usr/lib/llvm-3.8/lib/libclang-3.8.so.1')): - clang.cindex.Config.set_library_file('/usr/lib/llvm-3.8/lib/libclang-3.8.so.1') + if (os.path.isfile('/usr/lib/llvm-14/lib/libclang.so.1')): + clang.cindex.Config.set_library_file('/usr/lib/llvm-14/lib/libclang.so.1') idx = clang.cindex.Index.create() # parse the input file