Skip to content

Commit 0671f7d

Browse files
Merge remote-tracking branch 'origin/main'
2 parents 16e67a4 + d78b8f4 commit 0671f7d

21 files changed

Lines changed: 119 additions & 129 deletions
Lines changed: 12 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,13 @@
1-
arrow
1+
activemq-nms-amqp
2+
activemq-nms-api
3+
activemq-nms-ems
4+
activemq-nms-msmq
5+
activemq-nms-openwire
6+
activemq-nms-stomp
7+
activemq-nms-xms
8+
logging-log4net
29
lucenenet
3-
log4net
4-
qpid
5-
thrift
6-
ignite
7-
cordova
8-
etch
9-
chemistry
10-
avro
11-
hadoop
12-
hbase
13-
hive
14-
kafka
10+
pulsar-dotpulsar
11+
qpid-proton-dotnet
12+
rocketmq-client-csharp
13+
usergrid-dotnet
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
brpc
2+
datasketches-cpp
3+
incubator-pegasus
4+
kudu
5+
kvrocks
6+
mesos
7+
nifi-minifi-cpp
8+
pulsar-client-cpp
9+
singa
10+
trafficserver

resources/repositories/java_repos.txt

Lines changed: 0 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -10,41 +10,31 @@ activemq
1010
activemq-activeio
1111
activemq-artemis
1212
activemq-cli-tools
13-
activemq-nms-openwire-generator
1413
activemq-openwire
1514
airavata
16-
allura
1715
ambari
1816
ant
19-
ant-antlibs-compress
2017
ant-antlibs-dotnet
21-
ant-antlibs-props
2218
ant-antlibs-vss
2319
ant-easyant-tasks
2420
ant-ivy
2521
ant-ivyde
2622
apex-core
27-
archiva
2823
archiva-redback-components
2924
archiva-redback-core
3025
archiva-sandbox
3126
aries
32-
aries-blueprint
3327
aries-cdi
3428
aries-containers
3529
aries-jax-rs-whiteboard
3630
aries-jpa
3731
aries-rsa
38-
aries-spifly
3932
aries-tx-control
40-
aries-util
4133
arrow
42-
asterixdb
4334
asterixdb-bad
4435
asterixdb-hyracks
4536
atlas
4637
avro
47-
axis2-java
4838
bigtop
4939
bigtop-manager
5040
bookkeeper
@@ -68,7 +58,6 @@ commons-chain
6858
commons-cli
6959
commons-codec
7060
commons-collections
71-
commons-collections4
7261
commons-compress
7362
commons-configuration
7463
commons-csv
@@ -100,13 +89,11 @@ commons-validator
10089
commons-vfs
10190
commons-weaver
10291
continuum
103-
cordova-plugin-compat
10492
creadur-rat
10593
crunch
10694
ctakes
10795
curator
10896
cxf
109-
cxf-build-utils
11097
cxf-dosgi
11198
cxf-fediz
11299
cxf-xjc-utils
@@ -116,7 +103,6 @@ datasketches-memory
116103
datasketches-pig
117104
datasketches-vector
118105
db-jdo
119-
db-torque
120106
ddlutils
121107
deltaspike
122108
directmemory-lightning
@@ -127,17 +113,12 @@ directory-ldap-api
127113
directory-mavibot
128114
directory-samples
129115
directory-server
130-
directory-sms
131116
directory-studio
132-
drill
133-
druid
134-
dubbo
135117
dubbo-rpc-jsonrpc
136118
empire-db
137119
felix
138120
fineract
139121
flex-blazeds
140-
flink
141122
flume
142123
fluo
143124
fluo-recipes
@@ -159,7 +140,6 @@ gobblin
159140
gora
160141
groovy
161142
guacamole-client
162-
hadoop
163143
hawq
164144
hbase
165145
hbase-operator-tools
@@ -174,9 +154,6 @@ hudi
174154
ignite
175155
incubator-batchee
176156
incubator-hivemall
177-
incubator-kie-benchmarks
178-
incubator-kie-drools
179-
incubator-kie-optaplanner
180157
incubator-optiq-linq4j
181158
incubator-retired-gossip
182159
incubator-retired-hdt
@@ -204,11 +181,9 @@ incubator-tuweni
204181
incubator-twill
205182
isis
206183
jackrabbit
207-
jackrabbit-filevault
208184
jackrabbit-filevault-package-maven-plugin
209185
jackrabbit-oak
210186
jackrabbit-ocm
211-
james
212187
james-jdkim
213188
james-jsieve
214189
james-jspf
@@ -222,11 +197,9 @@ jspwiki
222197
kafka
223198
kalumet
224199
kandula
225-
karaf
226200
karaf-boot
227201
karaf-cellar
228202
karaf-decanter
229-
kylin
230203
lens
231204
logging-flume
232205
logging-log4j-boot
@@ -237,7 +210,6 @@ manifoldcf
237210
manifoldcf-integration-solr-3.x
238211
manifoldcf-integration-solr-4.x
239212
marmotta
240-
maven
241213
maven-2
242214
maven-ant-tasks
243215
maven-app-engine
@@ -258,11 +230,8 @@ metamodel
258230
metamodel-membrane
259231
mina
260232
mina-sshd
261-
myfaces-extcdi
262-
myfaces-extval
263233
myfaces-scripting
264234
netbeans-html4j
265-
nifi
266235
nifi-maven
267236
nifi-minifi
268237
nutch
@@ -313,53 +282,42 @@ sandesha
313282
sanselan
314283
santuario-java
315284
savan
316-
seatunnel
317285
sentry
318286
servicecomb-java-chassis
319287
servicemix
320288
servicemix-archetypes
321-
servicemix-bundles
322289
servicemix-components
323290
servicemix-maven-plugins
324291
servicemix-specs
325292
servicemix-utils
326293
servicemix3
327-
servicemix4-bundles
328294
servicemix4-features
329295
servicemix4-kernel
330296
servicemix4-nmr
331297
servicemix4-specs
332298
shardingsphere-elasticjob
333-
shardingsphere-example
334299
shiro
335300
sis
336-
skywalking
337-
sling
338301
sling-org-apache-sling-adapter
339302
sling-org-apache-sling-api
340303
sling-org-apache-sling-auth-core
341304
sling-org-apache-sling-auth-form
342305
sling-org-apache-sling-bundleresource-impl
343-
sling-org-apache-sling-caconfig-api
344306
sling-org-apache-sling-caconfig-impl
345307
sling-org-apache-sling-caconfig-integration-tests
346308
sling-org-apache-sling-caconfig-spi
347309
sling-org-apache-sling-commons-classloader
348310
sling-org-apache-sling-commons-compiler
349311
sling-org-apache-sling-commons-contentdetection
350-
sling-org-apache-sling-commons-johnzon
351312
sling-org-apache-sling-commons-log
352313
sling-org-apache-sling-commons-logservice
353-
sling-org-apache-sling-commons-messaging
354314
sling-org-apache-sling-commons-messaging-mail
355315
sling-org-apache-sling-commons-metrics
356316
sling-org-apache-sling-commons-metrics-rrd4j
357317
sling-org-apache-sling-commons-mime
358318
sling-org-apache-sling-commons-osgi
359319
sling-org-apache-sling-commons-scheduler
360-
sling-org-apache-sling-commons-threaddump
361320
sling-org-apache-sling-commons-threads
362-
sling-org-apache-sling-discovery-api
363321
sling-org-apache-sling-discovery-base
364322
sling-org-apache-sling-discovery-commons
365323
sling-org-apache-sling-discovery-impl
@@ -371,33 +329,26 @@ sling-org-apache-sling-event-dea
371329
solr
372330
spark
373331
sqoop
374-
stanbol
375332
storm
376333
struts
377-
struts-annotations
378-
struts-extras
379334
struts-sandbox
380335
struts1
381336
subversion
382337
synapse
383338
syncope
384-
tapestry
385339
tez
386-
tika
387340
tiles-autotag
388341
tiles-request
389342
tomcat
390343
tomcat-maven-plugin
391344
tomcat55
392345
tomcat80
393346
tomee
394-
turbine
395347
tuscany-das
396348
tuscany-sca-1.x
397349
tuscany-sca-2.x
398350
tuscany-sdo
399351
twill
400-
uima
401352
uima-addons
402353
uima-sandbox
403354
uima-uimafit
Lines changed: 2 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,2 @@
1-
spark
2-
beam
3-
kafka
4-
flink
5-
druid
6-
pulsar
7-
nifi
8-
cassandra
9-
hadoop
10-
hbase
11-
hive
12-
storm
13-
superset
14-
airflow
15-
mahout
1+
logging-log4j-kotlin
2+
incubator-retired-amaterasu
Lines changed: 8 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,9 @@
11
airflow
2-
beam
3-
libcloud
4-
mxnet
5-
spamassassin
6-
superset
7-
zookeeper
8-
hadoop
9-
hbase
10-
hive
11-
kafka
12-
spark
13-
storm
14-
flink
15-
druid
16-
superset
17-
pulsar
18-
nifi
19-
cassandra
20-
mahout
2+
allura
3+
openserverless-operator
4+
iceberg-python
5+
airflow-client-python
6+
skywalking-python
7+
ignite-python-thin-client
8+
pulsar-client-python
9+
hunter

resources/repositories/rust_repos.txt

Lines changed: 0 additions & 2 deletions
This file was deleted.

src/infrastructure/repository_utils.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -31,12 +31,12 @@ def read_repositories(language: str) -> List[Repository]:
3131

3232
return [apache_repo_from_name(repo_name) for repo_name in repositories]
3333

34-
def read_commits(repository_url: str, file_extension: str, final_date: Optional[datetime] = None) -> Generator[Commit, None, None]:
34+
def read_commits(repository_url: str, file_extensions: List[str], final_date: Optional[datetime] = None) -> Generator[Commit, None, None]:
3535
"""
3636
Reads commits from a repository using PyDriller.
3737
3838
@param: repository_url: The URL of the repository.
39-
@param: file_extension: The file extension to search for.
39+
@param: file_extensions: The file extensions to search for.
4040
@param: final_date: Date to read commits up until from the given repository.
4141
4242
@return: A generator of Commit objects.
@@ -45,7 +45,7 @@ def read_commits(repository_url: str, file_extension: str, final_date: Optional[
4545
raise ValueError("Final date must be in the past.")
4646
to_date = final_date if final_date else datetime.now()
4747

48-
return DrillerRepo(repository_url, only_modifications_with_file_types=[file_extension], to=to_date).traverse_commits()
48+
return DrillerRepo(repository_url, only_modifications_with_file_types=file_extensions, to=to_date).traverse_commits()
4949

5050
def repo_from_url(repo_url: str):
5151
repo_name = re.search(r"github\.com/[^/]+/([^/.]+)", repo_url).group(1) if re.search(r"github\.com/[^/]+/([^/.]+)", repo_url) else None

src/mining/commit_processing.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
from collections import defaultdict
22
from src.mining import commit_retrieval as retrieval
3-
from src.models.LanguageFileHandler import LanguageFileHandler
3+
from src.models.file_handlers.LanguageFileHandler import LanguageFileHandler
44

55
def gather_commits_and_tests(repo_name, file_handler: LanguageFileHandler):
66
"""
@@ -82,4 +82,4 @@ def calculate_average_commit_size(commits, test_files):
8282
counter += 1
8383
complete_indexes.append(test_file[0])
8484

85-
return round(total/counter, 1)
85+
return round(total/counter, 1) if counter != 0 else 0

src/mining/commit_retrieval.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,15 +4,15 @@
44
from src.infrastructure import repository_utils as repository_utils
55
from src.infrastructure import file_utils as file_utils
66
from src.infrastructure import serialize as serializer
7-
from src.models.LanguageFileHandler import LanguageFileHandler
7+
from src.models.file_handlers.LanguageFileHandler import LanguageFileHandler
88
from src.models.CustomCommit import CustomCommit
99
from src.models.Repository import Repository
1010

1111
def _retrieve_files(modified_files, file_handler: LanguageFileHandler):
1212
files = []
1313

1414
for file in modified_files:
15-
if file_handler.file_extension in file.filename:
15+
if any(file_extension in file.filename for file_extension in file_handler.file_extensions):
1616
files.append(file.filename)
1717

1818
return files
@@ -21,7 +21,7 @@ def _retrieve_commits(repo_url, file_handler: LanguageFileHandler, final_date =
2121
commits = []
2222

2323
try:
24-
for commit in repository_utils.read_commits(repo_url, file_handler.file_extension, final_date):
24+
for commit in repository_utils.read_commits(repo_url, file_handler.file_extensions, final_date):
2525
files = _retrieve_files(commit.modified_files, file_handler)
2626
commits.append(CustomCommit(commit.hash, files, commit.author, commit.author_date))
2727
except Exception as e:

0 commit comments

Comments
 (0)