一.加载csv文件
dataset.dataset_dict.DatasetDict.from_csv(
# 常用的路径名字
路径和多个路径的名字: Dict[str, PathLike],
特征: Optional[Features] = None,
缓存路径: str = None,
keep_in_memory: bool = False,
)
from datasets.dataset_dict import DatasetDict
train = DatasetDict.from_csv({'train': 'data_squad/SQuAD/train.csv'})
二.加载json文件
dataset.DatasetDict.from_json(
path_or_paths: Dict[str, PathLike],
features: Optional[Features] = None,
cache_dir: str = None,
keep_in_memory: bool = False,
**kwargs,
)
from datasets.dataset_dict import DatasetDict
train = DatasetDict.from_json({'train': 'data_squad/SQuAD/train.json'})
三.加载text文件
dataset.DatasetDict.from_text(
path_or_paths: Dict[str, PathLike],
features: Optional[Features] = None,
cache_dir: str = None,
keep_in_memory: bool = False,
**kwargs,
)
from datasets.dataset_dict import DatasetDict
train = DatasetDict.from_text({‘train’: ‘data_squad/SQuAD/train.text’})
四.parquet文件
dataset.DatasetDict.from_parquet(
path_or_paths: Dict[str, PathLike],
features: Optional[Features] = None,
cache_dir: str = None,
keep_in_memory: bool = False,
columns: Optional[List[str]] = None,
**kwargs,
)
from datasets.dataset_dict import DatasetDict
train = DatasetDict.from_parquet({‘train’: ‘data_squad/SQuAD/train.text’})