Skip to content

Commit cf90038

Browse files
committed
Put more file type metadata into filetypes.py
1 parent 7b5435e commit cf90038

3 files changed

Lines changed: 64 additions & 4 deletions

File tree

oshminer/GitHub.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44
# SPDX-License-Identifier: AGPL-3.0-or-later
55

66
# Python Standard Library imports
7-
import csv
87
import os
98
import string
109
import sys
@@ -140,6 +139,14 @@ async def get_file_list(project: dict, session) -> list:
140139
#
141140

142141
async def get_files_editability(project: dict, session) -> dict:
142+
"""
143+
Return a `dict` of files in this repository and an assessment of their
144+
editability based on the `osh-file-types` lists from:
145+
https://gitlab.com/OSEGermany/osh-file-types/
146+
"""
147+
148+
filetypes.osh_file_types
149+
143150
# Placeholder result
144151
result: dict = {
145152
"files_editability": "Not implemented for GitHub yet."

oshminer/Wikifactory.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44
# SPDX-License-Identifier: AGPL-3.0-or-later
55

66
# Python Standard Library imports
7-
import csv
87
from datetime import datetime
98
import json
109
import os

oshminer/filetypes.py

Lines changed: 56 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,8 @@
33
# SPDX-FileCopyrightText: 2022 Pen-Yuan Hsing
44
# SPDX-License-Identifier: AGPL-3.0-or-later
55

6-
# Categorised list of file extensions
6+
# 1. Categorised list of file extensions
7+
#
78
# Each category is a `list` and each extension should be in only one list.
89
#
910
# Most extensions are from this paper:
@@ -12,6 +13,20 @@
1213
# mining. Design Science, 4(e19). https://doi.org/10.1017/dsj.2018.15
1314
#
1415
# Extensions changed or not from that paper are marked in comments.
16+
#
17+
# 2. Additional file extension information
18+
#
19+
# `osh_file_types` is derived from a list of open source hardware file
20+
# extension information from the Open Source Ecology Germany CAD and PCB file
21+
# type metadata lists:
22+
# https://gitlab.com/OSEGermany/osh-file-types/
23+
24+
# Python Standard Library imports
25+
import csv
26+
27+
#
28+
# 1. Categorised list of file extensions
29+
#
1530

1631
# Electronic CAD
1732
ecad: list = [
@@ -132,4 +147,43 @@
132147
"tex", # added
133148
"txt",
134149
"yaml" # added
135-
]
150+
]
151+
152+
#
153+
# 2. Additional file extension information
154+
#
155+
156+
# Column mappings to shorter names
157+
column_mappings: dict = {
158+
"File extension": "extension",
159+
"File format [open|proprietary|unknown]": "format",
160+
"Encoding [text|binary|both|unknown]": "encoding",
161+
"Category [source|export]": "category"
162+
}
163+
164+
# Read list of CAD files
165+
with open("oshminer/osh-file-types/file_extension_formats-cad.csv", newline = '') as cad_formats_file:
166+
cad_reader: csv.DictReader = csv.DictReader(cad_formats_file, delimiter=',')
167+
# Get column names, see:
168+
# https://stackoverflow.com/a/28837325/186904
169+
# https://www.geeksforgeeks.org/get-column-names-from-csv-using-python/
170+
cad_files: list = list(cad_reader)
171+
# Rename `dict` keys for each entry for easier handling, see:
172+
# https://stackoverflow.com/a/16475444/186904
173+
for column_name in list(column_mappings.keys()):
174+
cad_files: list = [{column_mappings[column_name] if k == column_name else k:v for k,v in r.items()} for r in cad_files]
175+
176+
# Read list of PCB files
177+
with open("oshminer/osh-file-types/file_extension_formats-pcb.csv", newline = '') as pcb_formats_file:
178+
pcb_reader: csv.DictReader = csv.DictReader(pcb_formats_file, delimiter=',')
179+
# Get column names, see:
180+
# https://stackoverflow.com/a/28837325/186904
181+
# https://www.geeksforgeeks.org/get-column-names-from-csv-using-python/
182+
pcb_files: list = list(pcb_reader)
183+
# Rename `dict` keys for each entry for easier handling, see:
184+
# https://stackoverflow.com/a/16475444/186904
185+
for column_name in list(column_mappings.keys()):
186+
pcb_files: list = [{column_mappings[column_name] if k == column_name else k:v for k,v in r.items()} for r in pcb_files]
187+
188+
# Combine CAD and PCB lists
189+
osh_file_types: list = cad_files + pcb_files

0 commit comments

Comments
 (0)