Skip to content

Commit 86e0cd0

Browse files
Implemented setup of generic ground truth extraction that does not care about the build system and only extracts the presence conditions in the context of a file's ifdefs. The extraction should now be possible for any C/C++ project
1 parent a48b764 commit 86e0cd0

12 files changed

Lines changed: 116 additions & 100 deletions

Dockerfile

Lines changed: 1 addition & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -6,16 +6,12 @@ RUN apk add maven git
66

77
WORKDIR /home/user
88

9-
RUN git clone --progress https://git.busybox.net/busybox/
10-
RUN git clone --progress https://github.com/torvalds/linux.git
11-
129
COPY local-maven-repo ./local-maven-repo
1310
COPY src ./src
1411
COPY pom.xml .
1512
RUN mvn package || exit
1613

1714

18-
1915
FROM ubuntu:20.04
2016

2117
# Create a user
@@ -56,16 +52,13 @@ COPY docker-resources/fix-perms.sh /home/user/
5652
COPY docker-resources/KernelHaven.jar /home/user/
5753
COPY docker-resources/extraction_busybox.properties /home/user/
5854
COPY docker-resources/extraction_linux.properties /home/user/
55+
COPY docker-resources/extraction_generic.properties /home/user/
5956

6057
RUN mkdir -p /home/user/extraction-results/output
6158
RUN chown user:user /home/user -R
6259
RUN chmod +x entrypoint.sh
6360
RUN chmod +x fix-perms.sh
6461
RUN chmod +x extract.sh
6562

66-
# Copy repositories from previous stage
67-
COPY --from=0 /home/user/linux /home/user/linux
68-
COPY --from=0 /home/user/busybox /home/user/busybox
69-
7063
ENTRYPOINT ["./entrypoint.sh", "./extract.sh"]
7164
USER user

docker-resources/extract.sh

Lines changed: 33 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -13,27 +13,53 @@ then
1313
echo "Executing variability extraction of BusyBox."
1414
if [ $# == 1 ]
1515
then
16-
java -jar Extraction-1.0.0-jar-with-dependencies.jar extraction_busybox.properties
16+
java -jar Extraction-1.0.0-jar-with-dependencies.jar extraction_busybox.properties "$1"
1717
elif [ $# == 2 ]
1818
then
19-
java -jar Extraction-1.0.0-jar-with-dependencies.jar extraction_busybox.properties "$2"
19+
java -jar Extraction-1.0.0-jar-with-dependencies.jar extraction_busybox.properties "$1" "$2"
2020
elif [ $# == 3 ]
2121
then
22-
java -jar Extraction-1.0.0-jar-with-dependencies.jar extraction_busybox.properties "$2" "$3"
22+
java -jar Extraction-1.0.0-jar-with-dependencies.jar extraction_busybox.properties "$1" "$2" "$3"
2323
fi
2424
elif [ "$1" == 'linux' ]
2525
then
2626
echo "Executing variability extraction of Linux."
2727
if [ $# == 1 ]
2828
then
29-
java -jar Extraction-1.0.0-jar-with-dependencies.jar extraction_linux.properties
29+
java -jar Extraction-1.0.0-jar-with-dependencies.jar extraction_linux.properties "$1"
3030
elif [ $# == 2 ]
3131
then
32-
java -jar Extraction-1.0.0-jar-with-dependencies.jar extraction_linux.properties "$2"
32+
java -jar Extraction-1.0.0-jar-with-dependencies.jar extraction_linux.properties "$1" "$2"
3333
elif [ $# == 3 ]
3434
then
35-
java -jar Extraction-1.0.0-jar-with-dependencies.jar extraction_linux.properties "$2" "$3"
35+
java -jar Extraction-1.0.0-jar-with-dependencies.jar extraction_linux.properties "$1" "$2" "$3"
3636
fi
37+
elif [ "$1" == '--help' ]
38+
then
39+
echo "You can start the extraction by providing the clone link to a git repository and optionally a commit or range of commits."
40+
echo "You can also specify 'linux'|'busybox' for a detailed extraction of additional build system information
41+
(i.e., file conditions and feature model) for Linux|BusyBox."
42+
echo "Examples:"
43+
echo "# Process linux versions v4.2 - v4.3"
44+
echo "./start-extraction.sh linux v4.2 v4.3"
45+
echo "# Process only linux version v4.7"
46+
echo "./start-extraction.sh linux v4.7"
47+
echo "# Process the entire history of busybox"
48+
echo "./start-extraction.sh busybox"
49+
echo "# Process the entire history of any repo (without build information)"
50+
echo "./start-extraction.sh https://github.com/OTHER_REPO.git"
51+
echo "# Process a specific commit of any repo (without build information)"
52+
echo "./start-extraction.sh https://github.com/OTHER_REPO.git COMMIT_ID"
3753
else
38-
echo "Select a SPL to extract from [ linux | busybox ]"
54+
echo "Executing variability extraction of $1."
55+
if [ $# == 1 ]
56+
then
57+
java -jar Extraction-1.0.0-jar-with-dependencies.jar extraction_generic.properties "$1"
58+
elif [ $# == 2 ]
59+
then
60+
java -jar Extraction-1.0.0-jar-with-dependencies.jar extraction_generic.properties "$1" "$2"
61+
elif [ $# == 3 ]
62+
then
63+
java -jar Extraction-1.0.0-jar-with-dependencies.jar extraction_generic.properties "$1" "$2" "$3"
64+
fi
3965
fi

docker-resources/extraction_busybox.properties

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,6 @@
22
# Common Extractor Parameters #
33
#####################################
44

5-
# Path to BusyBox sources, change this if you have cloned the repo to a different location!
6-
source_tree = ./busybox
7-
# This url is only required if the repo has not been cloned yet and should be cloned to the directory specified above
8-
source_repo_url = https://git.busybox.net/busybox/
9-
105
# The architecture for which BusyBOx is supposed to be analyzed, you do not have to change this value. If it is changed,
116
# the variability is extracted for files that are associated with a different architecture. See the contents of
127
# BUSYBOX_DIR/arch for possible values.
@@ -47,8 +42,12 @@ analysis.code_block.consider_missing_bm_infos = true
4742
#######################################
4843
#######################################
4944

45+
# Path to BusyBox sources
46+
source_tree = ./busybox
47+
source_repo_url = https://git.busybox.net/busybox/
48+
5049
# Do not change this
51-
analysis.class = org.variantsync.vevos.extraction.kh.VariabilityAnalysis
50+
analysis.class = org.variantsync.vevos.extraction.kh.FullAnalysis
5251
preparation.class.0 = net.ssehub.kernel_haven.busyboot.PrepareBusybox
5352
analysis.output.type = csv
5453

docker-resources/extraction_generic.properties

Lines changed: 12 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -2,20 +2,10 @@
22
# Common Extractor Parameters #
33
#####################################
44

5-
# Path to BusyBox sources, change this if you have cloned the repo to a different location!
6-
source_tree = ./busybox
7-
# This url is only required if the repo has not been cloned yet and should be cloned to the directory specified above
8-
source_repo_url = https://git.busybox.net/busybox/
9-
10-
# The architecture for which BusyBOx is supposed to be analyzed, you do not have to change this value. If it is changed,
11-
# the variability is extracted for files that are associated with a different architecture. See the contents of
12-
# BUSYBOX_DIR/arch for possible values.
13-
arch = i386
14-
155
### Logging ###
166
# Possible values: INFO, STATUS, DEBUG, WARNING, ERROR
177
# Log level used by KernelHaven
18-
log.level = INFO
8+
log.level = DEBUG
199
# Flag whether the KernelHaven log should be printed to the terminal
2010
log.console = false
2111
# Log level used by the VariabilityExtraction
@@ -33,10 +23,6 @@ analysis.number_of_tasks = 5
3323
# in case of missing build model information (i.e., no feature model or file condition)
3424
analysis.code_block.consider_missing_bm_infos = true
3525

36-
# The folders which are considered by KbuildMiner can be specified manually with the following property. Normally,
37-
# they are determined automatically, which we consider to be the best option in most cases
38-
# build.extractor.top_folders = arch/x86,block,crypto,drivers,fs,init,ipc,kernel,lib,mm,net,security,sound
39-
4026
#######################################
4127
#######################################
4228
# WARNING #
@@ -47,9 +33,14 @@ analysis.code_block.consider_missing_bm_infos = true
4733
#######################################
4834
#######################################
4935

36+
arch =
37+
38+
# Path to sources
39+
source_tree = TBD
40+
source_repo_url = TBD
41+
5042
# Do not change this
51-
analysis.class = org.variantsync.vevos.extraction.kh.VariabilityAnalysis
52-
preparation.class.0 = net.ssehub.kernel_haven.busyboot.PrepareBusybox
43+
analysis.class = org.variantsync.vevos.extraction.kh.PartialAnalysis
5344
analysis.output.type = csv
5445

5546
######################################
@@ -75,29 +66,19 @@ code.provider.cache.read = false
7566
code.extractor.class = net.ssehub.kernel_haven.block_extractor.CodeBlockExtractor
7667
code.extractor.files =
7768
# CodeBlockExtractor parses header and code files separately
78-
code.extractor.file_regex = .*\\.(c|h)
69+
code.extractor.file_regex = .*\\.(c|h|cpp|hpp)
7970
code.extractor.threads = 10
8071
code.extractor.invalid_condition = TRUE
81-
code.extractor.handle_linux_macros = true
72+
code.extractor.handle_linux_macros = false
8273
code.extractor.fuzzy_parsing = true
8374

8475

8576
################################
8677
# Build Model Parameters #
8778
################################
88-
89-
build.provider.timeout = 0
90-
build.provider.cache.write = false
91-
build.provider.cache.read = false
92-
build.extractor.class = net.ssehub.kernel_haven.kbuildminer.KbuildMinerExtractor
93-
79+
# EMPTY as build model analysis is project-dependent
9480

9581
#######################################
9682
# Variability Model Parameters #
9783
#######################################
98-
99-
variability.provider.timeout = 0
100-
variability.provider.cache.write = true
101-
variability.provider.cache.read = false
102-
variability.extractor.class = net.ssehub.kernel_haven.kconfigreader.KconfigReaderExtractor
103-
variability.extractor.dumpconf_version = BUSYBOX
84+
# EMPTY as variability model analysis is project-dependent

docker-resources/extraction_linux.properties

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,6 @@
11
#####################################
22
# Common Extractor Parameters #
33
#####################################
4-
5-
# Path to linux sources, change this if you have cloned the repo to a different location!
6-
source_tree = ./linux
7-
# This url is only required if the repo has not been cloned yet and should be cloned to the directory specified above
8-
source_repo_url = https://github.com/torvalds/linux.git
9-
104
# The architecture for which linux is supposed to be analyzed, you do not have to change this value. If it is changed,
115
# the variability is extracted for files that are associated with a different architecture. See the contents of
126
# LINUX_DIR/arch for possible values.
@@ -47,8 +41,12 @@ analysis.code_block.consider_missing_bm_infos = true
4741
#######################################
4842
#######################################
4943

44+
# Path to linux sources
45+
source_tree = ./linux
46+
source_repo_url = https://github.com/torvalds/linux.git
47+
5048
# Do not change this
51-
analysis.class = org.variantsync.vevos.extraction.kh.VariabilityAnalysis
49+
analysis.class = org.variantsync.vevos.extraction.kh.FullAnalysis
5250
analysis.output.type = csv
5351

5452
######################################

src/main/java/org/variantsync/vevos/extraction/Extraction.java

Lines changed: 27 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -56,27 +56,46 @@ public static void main(String... args) throws IOException, GitAPIException {
5656

5757
// Parse the arguments
5858
File propertiesFile = getPropertiesFile(args);
59+
String repo = args[1];
5960
String firstCommit = null;
6061
String lastCommit = null;
61-
if (args.length > 1) {
62-
firstCommit = args[1];
63-
if (args.length > 2) {
64-
lastCommit = args[2];
62+
if (args.length > 2) {
63+
firstCommit = args[2];
64+
if (args.length > 3) {
65+
lastCommit = args[3];
6566
}
6667
}
6768

6869
// Load the configuration
6970
Configuration config = getConfiguration(propertiesFile);
7071
LOGGER.setLevel(config.getValue(LOG_LEVEL_MAIN));
7172

73+
// Update the repo information if necessary
74+
if (config.getValue(PATH_TO_SOURCE_REPO).equals("TBD")) {
75+
LOGGER.logStatus("Expecting repo link as first argument...");
76+
LOGGER.logStatus("Provided repo link: " + repo);
77+
78+
String[] parts = repo.split("/");
79+
String repoName = parts[parts.length-1];
80+
repoName = repoName.split("\\.")[0];
81+
LOGGER.logStatus("Identified repo name: " + repoName);
82+
83+
config.setValue(PATH_TO_SOURCE_REPO, "./" + repoName);
84+
config.setValue(URL_OF_SOURCE_REPO, repo);
85+
}
86+
7287
// Clone the SPL if necessary and return the File that points to the directory
7388
File splDir = setUpSPLDirectory(config);
74-
// Create the directories for each task running the analysis
75-
File workingDirectory = setUpWorkingDirectory(config, splDir);
7689
// Load git history
7790
List<RevCommit> commits = GitUtil.getCommits(splDir, firstCommit, lastCommit);
91+
LOGGER.logStatus("Identified " + commits.size() + " commit(s) for processing.");
92+
93+
// Number of threats is the Minimum of the specified number and the number of commits to process
94+
int numberOfThreads = Math.min(config.getValue(NUMBER_OF_THREADS),commits.size());
95+
96+
// Create the directories for each task running the analysis
97+
File workingDirectory = setUpWorkingDirectory(config, splDir, numberOfThreads);
7898

79-
int numberOfThreads = config.getValue(NUMBER_OF_THREADS);
8099
LOGGER.logStatus("Starting thread pool with " + numberOfThreads + " threads.");
81100
ExecutorService threadPool = Executors.newFixedThreadPool(numberOfThreads);
82101
LOGGER.logStatus("Splitting commits into " + numberOfThreads + " subset(s).");
@@ -163,8 +182,7 @@ private static File setUpSPLDirectory(Configuration config) {
163182
return splDir;
164183
}
165184

166-
private static File setUpWorkingDirectory(Configuration config, File splDir) {
167-
int numberOfThreads = config.getValue(NUMBER_OF_THREADS);
185+
private static File setUpWorkingDirectory(Configuration config, File splDir, int numberOfThreads) {
168186
File workingDirectory = new File(System.getProperty("user.dir"));
169187
workingDirectory = new File(workingDirectory, config.getValue(WORKING_DIR_NAME));
170188
LOGGER.logInfo("Working Directory: " + workingDirectory);

src/main/java/org/variantsync/vevos/extraction/kh/VariabilityAnalysis.java renamed to src/main/java/org/variantsync/vevos/extraction/kh/FullAnalysis.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,14 +7,14 @@
77
import net.ssehub.kernel_haven.util.null_checks.NonNull;
88
import net.ssehub.kernel_haven.fe_analysis.pcs.CodeBlockAnalysis;
99

10-
public class VariabilityAnalysis extends PipelineAnalysis {
10+
public class FullAnalysis extends PipelineAnalysis {
1111

1212
/**
13-
* Creates a new {@link VariabilityAnalysis}.
13+
* Creates a new {@link FullAnalysis}.
1414
*
1515
* @param config The global configuration.
1616
*/
17-
public VariabilityAnalysis(@NonNull Configuration config) {
17+
public FullAnalysis(@NonNull Configuration config) {
1818
super(config);
1919
}
2020

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
package org.variantsync.vevos.extraction.kh;
2+
3+
import net.ssehub.kernel_haven.SetUpException;
4+
import net.ssehub.kernel_haven.analysis.AnalysisComponent;
5+
import net.ssehub.kernel_haven.analysis.PipelineAnalysis;
6+
import net.ssehub.kernel_haven.config.Configuration;
7+
import net.ssehub.kernel_haven.fe_analysis.pcs.CodeBlockAnalysis;
8+
import net.ssehub.kernel_haven.util.null_checks.NonNull;
9+
10+
public class PartialAnalysis extends PipelineAnalysis {
11+
12+
/**
13+
* Creates a new {@link PartialAnalysis}.
14+
*
15+
* @param config The global configuration.
16+
*/
17+
public PartialAnalysis(@NonNull Configuration config) {
18+
super(config);
19+
}
20+
21+
@Override
22+
protected @NonNull AnalysisComponent<?> createPipeline() throws SetUpException {
23+
return new CodeBlockAnalysis(config, getCmComponent(), null, null);
24+
}
25+
26+
}

src/main/resources/extraction_busybox.properties

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ analysis.code_block.consider_missing_bm_infos = true
4949
#######################################
5050

5151
# Do not change this
52-
analysis.class = org.variantsync.vevos.extraction.kh.VariabilityAnalysis
52+
analysis.class = org.variantsync.vevos.extraction.kh.FullAnalysis
5353
preparation.class.0 = net.ssehub.kernel_haven.busyboot.PrepareBusybox
5454
analysis.output.type = csv
5555

src/main/resources/extraction_linux.properties

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ analysis.code_block.consider_missing_bm_infos = true
4949
#######################################
5050

5151
# Do not change this
52-
analysis.class = org.variantsync.vevos.extraction.kh.VariabilityAnalysis
52+
analysis.class = org.variantsync.vevos.extraction.kh.FullAnalysis
5353
analysis.output.type = csv
5454

5555
######################################

0 commit comments

Comments
 (0)