Skip to content

Commit bf68dbf

Browse files
authored
Merge pull request #19 from ai-forever/dev
New features
2 parents a7d408e + 302944f commit bf68dbf

6 files changed

Lines changed: 101 additions & 5 deletions

File tree

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,9 @@ data/
33
tests/
44
pipelines/
55

6+
# pycharm
7+
.idea/
8+
69
# Byte-compiled / optimized / DLL files
710
__pycache__/
811
*.py[cod]

DPF/filesystems/filesystem.py

Lines changed: 40 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,33 @@
22
import os
33
import io
44
import tarfile
5-
from typing import Union, List, Tuple, Iterable
5+
import datetime
6+
from typing import Union, List, Dict, Optional, Tuple, Iterable
67
import pandas as pd
78

89

10+
class FileData:
11+
"""Class that represents a file with his metadata"""
12+
13+
def __init__(
14+
self,
15+
path: str,
16+
type: str,
17+
last_modified: Optional[datetime.datetime] = None,
18+
file_size: Optional[int] = None
19+
):
20+
self.path = path
21+
self.name = os.path.basename(self.path.rstrip('/'))
22+
assert type in {'directory', 'file'}, \
23+
"param 'type' must be one of {'directory', 'file'}, got "+str(type)
24+
self.type = type
25+
self.last_modified = last_modified
26+
self.file_size = file_size
27+
28+
def __repr__(self) -> str:
29+
return f'File(path="{self.path}, size={self.file_size}, last_modified={self.last_modified}")'
30+
31+
932
class FileSystem(ABC):
1033
"""
1134
Abstract class for all filesystems
@@ -153,6 +176,22 @@ def listdir_with_ext(
153176
if f.endswith(ext)
154177
]
155178

179+
@abstractmethod
180+
def listdir_meta(self, folder_path: str) -> List[FileData]:
181+
"""
182+
Returns the contents of folder with meta information (datetime created, etc)
183+
184+
Parameters
185+
----------
186+
folder_path: str
187+
Path to folder
188+
189+
Returns
190+
-------
191+
List[FileData]
192+
List of FileData objects
193+
"""
194+
156195
@abstractmethod
157196
def mkdir(self, folder_path: str) -> None:
158197
"""

DPF/filesystems/localfilesystem.py

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
11
import os
22
import io
3+
from datetime import datetime
34
from typing import Union, List, Optional, Tuple, Iterable
45

5-
from .filesystem import FileSystem
6+
from .filesystem import FileSystem, FileData
67

78

89
class LocalFileSystem(FileSystem):
@@ -44,6 +45,18 @@ def listdir(
4445
files = [folder_path + f for f in files]
4546
return files
4647

48+
def listdir_meta(self, folder_path: str) -> List[FileData]:
49+
folder_path = folder_path.rstrip("/") + "/"
50+
results = []
51+
for fd in os.scandir(folder_path):
52+
path = fd.path
53+
type_ = 'directory' if fd.is_dir() else 'file'
54+
stats = fd.stat()
55+
size = stats.st_size
56+
last_modified = datetime.fromtimestamp(stats.st_mtime)
57+
results.append(FileData(path, type_, last_modified, size))
58+
return results
59+
4760
def mkdir(self, folder_path: str) -> None:
4861
folder_path = folder_path.rstrip("/") + "/"
4962
os.makedirs(folder_path, exist_ok=True)

DPF/filesystems/s3filesystem.py

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
from typing import Union, List, Optional, Tuple, Iterable
44
import fsspec
55

6-
from .filesystem import FileSystem
6+
from .filesystem import FileSystem, FileData
77

88

99
class S3FileSystem(FileSystem):
@@ -63,6 +63,24 @@ def listdir(
6363
files = ["s3://" + f for f in files]
6464
return files
6565

66+
def listdir_meta(self, folder_path: str) -> List[FileData]:
67+
folder_path = folder_path.lstrip("s3://").rstrip("/") + "/"
68+
s3 = fsspec.filesystem("s3", **self.storage_options)
69+
files_data = s3.ls(folder_path, detail=True)
70+
71+
results = []
72+
for file_data in files_data:
73+
if file_data['Key'] == folder_path:
74+
continue
75+
path = "s3://"+file_data['Key']
76+
filetype = file_data['type']
77+
size = None
78+
last_modified = file_data.get('LastModified', None)
79+
if filetype == 'file':
80+
size = file_data.get('Size', None)
81+
results.append(FileData(path, filetype, last_modified, size))
82+
return results
83+
6684
def mkdir(self, folder_path: str) -> None:
6785
folder_path = folder_path.rstrip("/") + "/"
6886
s3 = fsspec.filesystem("s3", **self.storage_options)
Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
torch
22
torchvision
33
autokeras
4-
git+https://github.com/openai/CLIP.git
4+
CLIP @ git+https://github.com/openai/CLIP.git
55
huggingface_hub
6-
git+https://github.com/boomb0om/CRAFT-text-detection/
6+
CRAFT @ git+https://github.com/boomb0om/CRAFT-text-detection/

setup.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
import os
2+
from setuptools import setup, find_packages
3+
4+
5+
def get_requirements(filename: str = 'requirements.txt'):
6+
here = os.path.dirname(os.path.realpath(__file__))
7+
with open(os.path.join(here, filename), 'r') as f:
8+
requires = [line.replace('\n', '') for line in f.readlines()]
9+
return requires
10+
11+
12+
setup(
13+
name="DPF",
14+
version="0.0.9",
15+
description="",
16+
author="Igor Pavlov, Mikhail Shoytov and Anastasia Lysenko",
17+
url='https://github.com/ai-forever/DataProcessingFramework',
18+
packages=find_packages(include=['DPF*']),
19+
install_requires=get_requirements(),
20+
extras_require={
21+
"filters": get_requirements(os.path.join('requirements', 'requirements_filters.txt')),
22+
}
23+
)

0 commit comments

Comments
 (0)