Is there anyone interested in this competition? It's not fun to compete alone, it even discourages me from taking care of errors!
---------------------------------------------------------------------------
ClientResponseError Traceback (most recent call last)
/opt/conda/lib/python3.7/site-packages/fsspec/implementations/http.py in _info(self, url, **kwargs)
415 **self.kwargs,
--> 416 **kwargs,
417 )
/opt/conda/lib/python3.7/site-packages/fsspec/implementations/http.py in _file_info(url, session, size_policy, **kwargs)
826 async with r:
--> 827 r.raise_for_status()
828
/opt/conda/lib/python3.7/site-packages/aiohttp/client_reqrep.py in raise_for_status(self)
1008 message=self.reason,
-> 1009 headers=self.headers,
1010 )
ClientResponseError: 403, message='Forbidden', url=URL('https://cdn-lfs.huggingface.co/repos/22/8b/228be01c8d9b89b1323448749de3877ee990dd02ae2fead8bbd49ba6d73d075a/78809276165a7fdad122ffd2c8d7fef0495570c10b409385f340f41f9d5ba528?response-content-disposition=attachment%3B+filename*%3DUTF-8''train.csv%3B+filename%3D%22train.csv%22%3B&response-content-type=text/csv&Expires=1677453692&Policy=eyJTdGF0ZW1lbnQiOlt7IlJlc291cmNlIjoiaHR0cHM6Ly9jZG4tbGZzLmh1Z2dpbmdmYWNlLmNvL3JlcG9zLzIyLzhiLzIyOGJlMDFjOGQ5Yjg5YjEzMjM0NDg3NDlkZTM4NzdlZTk5MGRkMDJhZTJmZWFkOGJiZDQ5YmE2ZDczZDA3NWEvNzg4MDkyNzYxNjVhN2ZkYWQxMjJmZmQyYzhkN2ZlZjA0OTU1NzBjMTBiNDA5Mzg1ZjM0MGY0MWY5ZDViYTUyOD9yZXNwb25zZS1jb250ZW50LWRpc3Bvc2l0aW9uPSomcmVzcG9uc2UtY29udGVudC10eXBlPXRleHQlMkZjc3YiLCJDb25kaXRpb24iOnsiRGF0ZUxlc3NUaGFuIjp7IkFXUzpFcG9jaFRpbWUiOjE2Nzc0NTM2OTJ9fX1dfQ__&Signature=eT2o4KbH-JUXIiIPyUep0tfm9kpDvtDSzIwyyzk6xyyjtaaDj0sDOhUcUlK22iZ6KVouDORNx2zNhOqM4unBj8LcPJeokBp92ARvbqaCkRr0J8NprKkTlL63rXKzu-k~gaDzLw2SgVdR7mP1DGBRe1DSm3LN626wXJVxLxVW0BMf7p6Q6-bn4trmHXnmLb051V0IwnHm8c0nEDjlfkoit2bP6rNnME2ZhYGKsh5nQqtC4-p6-bMfWpuGDGAw0Gs20fxQMgRvuvH14iEULBfoy7F2OVbRdLqUJ3RWoFpq6I9Prk-rdlqkNVyX8~VYy2C-VR73V-fr8iwhkgN7H6mHCw__&Key-Pair-Id=KVTP0A1DKRTAX')
The above exception was the direct cause of the following exception:
FileNotFoundError Traceback (most recent call last)
/tmp/ipykernel_23/877569627.py in <module>
----> 1 k = next(iter(dataset["train"]))
/opt/conda/lib/python3.7/site-packages/datasets/iterable_dataset.py in __iter__(self)
495
496 def __iter__(self):
--> 497 for key, example in self._iter():
498 if self.features:
499 # we encode the example for ClassLabel feature types for example
/opt/conda/lib/python3.7/site-packages/datasets/iterable_dataset.py in _iter(self)
492 else:
493 ex_iterable = self._ex_iterable
--> 494 yield from ex_iterable
495
496 def __iter__(self):
/opt/conda/lib/python3.7/site-packages/datasets/iterable_dataset.py in __iter__(self)
85
86 def __iter__(self):
---> 87 yield from self.generate_examples_fn(**self.kwargs)
88
89 def shuffle_data_sources(self, generator: np.random.Generator) -> "ExamplesIterable":
~/.cache/huggingface/modules/datasets_modules/datasets/tobiolatunji--afrispeech-200/0994341a78a520144afc15e99c95aacfe056e9833f4becf9efa34969c3f81c5e/afrispeech-200.py in _generate_examples(self, local_extracted_archive_paths, archives, meta_path)
236 data_fields = [key for key in self._info().features.keys() if key not in ["audio", "path"]]
237 metadata = {}
--> 238 with open(meta_path, "r", encoding="utf-8") as f:
239 reader = csv.DictReader(f)
240 for row in tqdm(reader, desc="Reading metadata..."):
/opt/conda/lib/python3.7/site-packages/datasets/streaming.py in wrapper(*args, **kwargs)
64 @wraps(function)
65 def wrapper(*args, **kwargs):
---> 66 return function(*args, use_auth_token=use_auth_token, **kwargs)
67
68 wrapper._decorator_name_ = "wrap_auth"
/opt/conda/lib/python3.7/site-packages/datasets/utils/streaming_download_manager.py in xopen(file, mode, use_auth_token, *args, **kwargs)
420 kwargs = {**kwargs, **new_kwargs}
421 try:
--> 422 file_obj = fsspec.open(file, mode=mode, *args, **kwargs).open()
423 except ValueError as e:
424 if str(e) == "Cannot seek streaming HTTP file":
/opt/conda/lib/python3.7/site-packages/fsspec/core.py in open(self)
132 during the life of the file-like it generates.
133 """
--> 134 return self.__enter__()
135
136 def close(self):
/opt/conda/lib/python3.7/site-packages/fsspec/core.py in __enter__(self)
100 mode = self.mode.replace("t", "").replace("b", "") + "b"
101
--> 102 f = self.fs.open(self.path, mode=mode)
103
104 self.fobjects = [f]
/opt/conda/lib/python3.7/site-packages/fsspec/spec.py in open(self, path, mode, block_size, cache_options, compression, **kwargs)
1139 autocommit=ac,
1140 cache_options=cache_options,
-> 1141 **kwargs,
1142 )
1143 if compression is not None:
/opt/conda/lib/python3.7/site-packages/fsspec/implementations/http.py in _open(self, path, mode, block_size, autocommit, cache_type, cache_options, size, **kwargs)
348 kw["asynchronous"] = self.asynchronous
349 kw.update(kwargs)
--> 350 size = size or self.info(path, **kwargs)["size"]
351 session = sync(self.loop, self.set_session)
352 if block_size and size:
/opt/conda/lib/python3.7/site-packages/fsspec/asyn.py in wrapper(*args, **kwargs)
112 def wrapper(*args, **kwargs):
113 self = obj or args[0]
--> 114 return sync(self.loop, func, *args, **kwargs)
115
116 return wrapper
/opt/conda/lib/python3.7/site-packages/fsspec/asyn.py in sync(loop, func, timeout, *args, **kwargs)
97 raise FSTimeoutError from return_result
98 elif isinstance(return_result, BaseException):
---> 99 raise return_result
100 else:
101 return return_result
/opt/conda/lib/python3.7/site-packages/fsspec/asyn.py in _runner(event, coro, result, timeout)
52 coro = asyncio.wait_for(coro, timeout=timeout)
53 try:
---> 54 result[0] = await coro
55 except Exception as ex:
56 result[0] = ex
/opt/conda/lib/python3.7/site-packages/fsspec/implementations/http.py in _info(self, url, **kwargs)
422 if policy == "get":
423 # If get failed, then raise a FileNotFoundError
--> 424 raise FileNotFoundError(url) from exc
425 logger.debug(str(exc))
426
FileNotFoundError: https://huggingface.co/datasets/tobiolatunji/afrispeech-200/resolve/main/transcripts/train.csv
wanna form a team?
maybe when other competitors join this competition, but for now I think I will concentrate on the other competitions
ok sounds fair
Apologies for the data access issues. We're working with huggingface to resolve this. Meanwhile, you can download the files directly from google drive from the data section of the competition
thanks intron, I didn't download the data because I don't have space neither locally nor on my drive hh. And I also apologize for being stressed because I remember doing something similar and I had obtained (0.29 WER on the train and 0.13 WER on the test, but now I obtain 84 WER on the train and 2.9 WER on the test)
What data is your submission from. I have been trying to submit my solution but it gives an error on several missing IDs. I have checked and the files being refered to are in the test dataset which is yet to be made available. PS. I am trying to submit on predictions from Afrispeech-dev/dev files
Can you share the stack trace of the error you are getting?
It also seems this problem only affects streaming mode
We have confirmed with Huggingface that this problem is from them and they have been working hard to fix it. Please check now, the issue has been fixed and the dataset loading for streaming works fine.
Merci intron