TensorSpeech · nglehuy · Jun 15, 2024 · Jun 18, 2024 · Jun 22, 2024 · Jun 24, 2024
diff --git a/.dockerignore b/.dockerignore
@@ -1,2 +1,4 @@
 LibriSpeech
 Models
+.venv*
+venv*
diff --git a/.pylintrc b/.pylintrc
@@ -3,7 +3,7 @@
 # A comma-separated list of package or module names from where C extensions may
 # be loaded. Extensions are loading into the active Python interpreter and may
 # run arbitrary code.
-extension-pkg-allow-list=pydantic,tensorflow
+extension-pkg-allow-list=pydantic
 
 # A comma-separated list of package or module names from where C extensions may
 # be loaded. Extensions are loading into the active Python interpreter and may
@@ -120,6 +120,11 @@ disable=too-few-public-methods,
         consider-using-f-string,
         fixme,
         unused-variable,
+        pointless-string-statement,
+        too-many-lines,
+        abstract-method,
+        too-many-ancestors,
+        import-outside-toplevel,
 
 # Enable the message, report, category or checker with the given id(s). You can
 # either give multiple identifier separated by comma (,) or put this option

diff --git a/.vscode/settings.json b/.vscode/settings.json
@@ -1,37 +1,31 @@
 {
-    "[python]": {
-        "editor.defaultFormatter": "ms-python.black-formatter"
-    },
-    "autoDocstring.docstringFormat": "numpy",
-    "black-formatter.args": [
-        "--config",
-        "${workspaceFolder}/pyproject.toml"
-    ],
-    "black-formatter.path": [
-        "${interpreter}",
-        "-m",
-        "black"
-    ],
-    "editor.codeActionsOnSave": {
-        "source.fixAll": "explicit",
-        "source.organizeImports": "explicit"
-    },
-    "editor.formatOnSave": true,
-    "isort.args": [
-        "--settings-file",
-        "${workspaceFolder}/pyproject.toml"
-    ],
-    "pylint.args": [
-        "--rcfile=${workspaceFolder}/.pylintrc"
-    ],
-    "pylint.path": [
-        "${interpreter}",
-        "-m",
-        "pylint"
-    ],
-    "python.analysis.fixAll": [
-        "source.unusedImports",
-        "source.convertImportFormat"
-    ],
-    "python.analysis.importFormat": "absolute"
-}
+  "[python]": {
+    "editor.defaultFormatter": "ms-python.black-formatter",
+    "editor.tabSize": 4
+  },
+  "[markdown]": {
+    "editor.tabSize": 2,
+    "editor.indentSize": 2,
+    "editor.detectIndentation": false
+  },
+  "[json]": {
+    "editor.tabSize": 2
+  },
+  "[yaml]": {
+    "editor.tabSize": 2
+  },
+  "autoDocstring.docstringFormat": "numpy",
+  "black-formatter.args": ["--config", "${workspaceFolder}/pyproject.toml"],
+  "black-formatter.path": ["${interpreter}", "-m", "black"],
+  "editor.codeActionsOnSave": {
+    "source.fixAll": "explicit",
+    "source.organizeImports": "explicit"
+  },
+  "editor.formatOnSave": true,
+  "isort.args": ["--settings-file", "${workspaceFolder}/pyproject.toml"],
+  "pylint.args": ["--rcfile=${workspaceFolder}/.pylintrc"],
+  "pylint.path": ["${interpreter}", "-m", "pylint"],
+  "python.analysis.fixAll": ["source.unusedImports", "source.convertImportFormat"],
+  "python.analysis.importFormat": "absolute",
+  "markdown.extension.list.indentationSize": "inherit"
+}
diff --git a/Dockerfile b/Dockerfile
@@ -1,4 +1,4 @@
-FROM tensorflow/tensorflow:2.3.2-gpu
+FROM tensorflow/tensorflow:2.18.0-gpu
 
 RUN apt-get update \
     && apt-get upgrade -y \
@@ -9,8 +9,8 @@ RUN apt-get update \
 RUN apt clean && apt-get clean
 
 # Install dependencies
-COPY requirements.txt /
-RUN pip --no-cache-dir install -r /requirements.txt
+COPY requirements*.txt /
+RUN pip --no-cache-dir install -r /requirements.txt -r /requirements.cuda.txt
 
 # Install rnnt_loss
 COPY scripts /scripts
@@ -21,4 +21,4 @@ RUN if [ "$install_rnnt_loss" = "true" ] ; \
     && ./scripts/install_rnnt_loss.sh \
     else echo 'Using pure TensorFlow'; fi
 
-RUN echo "export LD_LIBRARY_PATH=/usr/local/cuda-10.2/lib64${LD_LIBRARY_PATH:+:${LD_LIBRARY_PATH}}" >> /root/.bashrc
+RUN echo "export LD_LIBRARY_PATH=/usr/local/cuda/lib64${LD_LIBRARY_PATH:+:${LD_LIBRARY_PATH}}" >> /root/.bashrc
diff --git a/README.md b/README.md
@@ -34,7 +34,6 @@ TensorFlowASR implements some automatic speech recognition architectures such as
   - [Installing from source (recommended)](#installing-from-source-recommended)
   - [Installing via PyPi](#installing-via-pypi)
   - [Installing for development](#installing-for-development)
-  - [Install for Apple Sillicon](#install-for-apple-sillicon)
   - [Running in a container](#running-in-a-container)
 - [Training \& Testing Tutorial](#training--testing-tutorial)
 - [Features Extraction](#features-extraction)
@@ -74,62 +73,46 @@ TensorFlowASR implements some automatic speech recognition architectures such as
 
 For training and testing, you should use `git clone` for installing necessary packages from other authors (`ctc_decoders`, `rnnt_loss`, etc.)
 
+**NOTE ONLY FOR APPLE SILICON**: TensorFlowASR requires python >= 3.12
+
+See the `requirements.[extra].txt` files for extra dependencies
+
 ### Installing from source (recommended)
 
 ```bash
 git clone https://github.com/TensorSpeech/TensorFlowASR.git
 cd TensorFlowASR
-# Tensorflow 2.x (with 2.x.x >= 2.5.1)
-pip3 install ".[tf2.x]" # or ".[tf2.x-gpu]"
+pip3 install -e . # or ".[cuda]" if using GPU
 ```
 
-For anaconda3:
+For **anaconda3**:
 
 ```bash
-conda create -y -n tfasr tensorflow-gpu python=3.8 # tensorflow if using CPU, this makes sure conda install all dependencies for tensorflow
+conda create -y -n tfasr python=3.11 # tensorflow if using CPU, this makes sure conda install all dependencies for tensorflow
 conda activate tfasr
-pip install -U tensorflow-gpu # upgrade to latest version of tensorflow
 git clone https://github.com/TensorSpeech/TensorFlowASR.git
 cd TensorFlowASR
-# Tensorflow 2.x (with 2.x.x >= 2.5.1)
-pip3 install ".[tf2.x]" # or ".[tf2.x-gpu]"
+pip3 install -e . # or ".[cuda]" if using GPU
 ```
 
-### Installing via PyPi
+For **colab with TPUs**:
 
 ```bash
-# Tensorflow 2.x (with 2.x >= 2.3)
-pip3 install "TensorFlowASR[tf2.x]" # or pip3 install "TensorFlowASR[tf2.x-gpu]"
+pip3 install -e ".[tpu]" -f https://storage.googleapis.com/libtpu-tf-releases/index.html
 ```
 
-### Installing for development
+### Installing via PyPi
 
 ```bash
-git clone https://github.com/TensorSpeech/TensorFlowASR.git
-cd TensorFlowASR
-pip3 install -e ".[dev]"
-pip3 install -e ".[tf2.x]" # or ".[tf2.x-gpu]" or ".[tf2.x-apple]" for apple m1 machine
+pip3 install "TensorFlowASR" # or "TensorFlowASR[cuda]" if using GPU
 ```
 
-### Install for Apple Sillicon
-
-Due to tensorflow-text is not built for Apple Sillicon, we need to install it with the prebuilt wheel file from [sun1638650145/Libraries-and-Extensions-for-TensorFlow-for-Apple-Silicon](https://github.com/sun1638650145/Libraries-and-Extensions-for-TensorFlow-for-Apple-Silicon)
+### Installing for development
 
 ```bash
 git clone https://github.com/TensorSpeech/TensorFlowASR.git
 cd TensorFlowASR
-pip3 install -e "." # or pip3 install -e ".[dev] for development # or pip3 install "TensorFlowASR[dev]" from PyPi
-pip3 install tensorflow~=2.14.0 # change minor version if you want
-```
-
-Do this after installing TensorFlowASR with tensorflow above
-
-```bash
-TF_VERSION="$(python3 -c 'import tensorflow; print(tensorflow.__version__)')" && \
-TF_VERSION_MAJOR="$(echo $TF_VERSION | cut -d'.' -f1,2)" && \
-PY_VERSION="$(python3 -c 'import platform; major, minor, patch = platform.python_version_tuple(); print(f"{major}{minor}");')" && \
-URL="https://github.com/sun1638650145/Libraries-and-Extensions-for-TensorFlow-for-Apple-Silicon" && \
-pip3 install "${URL}/releases/download/v${TF_VERSION_MAJOR}/tensorflow_text-${TF_VERSION_MAJOR}.0-cp${PY_VERSION}-cp${PY_VERSION}-macosx_11_0_arm64.whl"
+pip3 install -e ".[apple,dev]"
 ```
 
 ### Running in a container
@@ -139,7 +122,6 @@ docker-compose up -d
 ```
 
 
-
 ## Training & Testing Tutorial
 
 - For training, please read [tutorial_training](./docs/tutorials/training.md)

diff --git a/docs/tokenizers.md b/docs/tokenizers.md
@@ -1,27 +1,26 @@
-# Tokenizers
-
 - [Tokenizers](#tokenizers)
   - [1. Character Tokenizer](#1-character-tokenizer)
   - [2. Wordpiece Tokenizer](#2-wordpiece-tokenizer)
   - [3. Sentencepiece Tokenizer](#3-sentencepiece-tokenizer)
 
+# Tokenizers
 
 ## 1. Character Tokenizer
 
-See [librespeech config](../examples/configs/librispeech/characters/char.yml.j2)
+See [librespeech config](../examples/datasets/librispeech/characters/char.yml.j2)
 
 This splits the text into characters and then maps each character to an index. The index starts from 1 and 0 is reserved for blank token. This tokenizer only used for languages that have a small number of characters and each character is not a combination of other characters. For example, English, Vietnamese, etc.
 
 ## 2. Wordpiece Tokenizer
 
-See [librespeech config](../examples/configs/librispeech/wordpiece/wp.yml.j2) for wordpiece splitted by whitespace
+See [librespeech config](../examples/datasets/librispeech/wordpiece/wp.yml.j2) for wordpiece splitted by whitespace
 
-See [librespeech config](../examples/configs/librispeech/wordpiece/wp_whitespace.yml.j2) for wordpiece that whitespace is a separate token
+See [librespeech config](../examples/datasets/librispeech/wordpiece/wp_whitespace.yml.j2) for wordpiece that whitespace is a separate token
 
 This splits the text into words and then splits each word into subwords. The subwords are then mapped to indices. Blank token can be set to <unk> as index 0. This tokenizer is used for languages that have a large number of words and each word can be a combination of other words, therefore it can be applied to any language.
 
 ## 3. Sentencepiece Tokenizer
 
-See [librespeech config](../examples/configs/librispeech/sentencepiece/sp.yml.j2)
+See [librespeech config](../examples/datasets/librispeech/sentencepiece/sp.yml.j2)
 
 This splits the whole sentence into subwords and then maps each subword to an index. Blank token can be set to <unk> as index 0. This tokenizer is used for languages that have a large number of words and each word can be a combination of other words, therefore it can be applied to any language.
diff --git a/docs/tutorials/testing.md b/docs/tutorials/testing.md
@@ -1,3 +1,11 @@
+- [Testing Tutorial](#testing-tutorial)
+  - [1. Install packages](#1-install-packages)
+  - [2. Prepare transcripts files](#2-prepare-transcripts-files)
+  - [3. Prepare config file](#3-prepare-config-file)
+  - [4. \[Optional\]\[Required if not exists\] Generate vocabulary and metadata](#4-optionalrequired-if-not-exists-generate-vocabulary-and-metadata)
+  - [5. Run testing](#5-run-testing)
+
+
 # Testing Tutorial
 
 These commands are example for librispeech dataset, but we can apply similar to other datasets
@@ -16,14 +24,14 @@ pip install ".[tf2.x]"
 This is the example for preparing transcript files for librispeech data corpus
 
 ```bash
-python scripts/create_librispeech_trans.py \
+tensorflow_asr utils create_librispeech_trans \
     --directory=/path/to/dataset/test-clean \
     --output=/path/to/dataset/test-clean/transcripts.tsv
 ```
 
 Do the same thing with `test-clean`, `test-other`
 
-For other datasets, you must prepare your own python script like the `scripts/create_librispeech_trans.py`
+For other datasets, you must prepare your own python script like the `tensorflow_asr/scripts/utils/create_librispeech_trans.py`
 
 ## 3. Prepare config file
 
@@ -38,7 +46,7 @@ The config file is the same as the config used for training
 Use the same vocabulary file used in training
 
 ```bash
-python scripts/prepare_vocab_and_metadata.py \
+tensorflow_asr utils prepare_vocab_and_metadata \
     --config-path=/path/to/config.yml.j2 \
     --datadir=/path/to/datadir
 ```
@@ -48,12 +56,12 @@ The inputs, outputs and other options of vocabulary are defined in the config fi
 ## 5. Run testing
 
 ```bash
-python examples/test.py \
+tensorflow_asr test \
 --config-path /path/to/config.yml.j2 \
 --dataset_type slice \
 --datadir /path/to/datadir \
 --outputdir /path/to/modeldir/tests \
 --h5 /path/to/modeldir/weights.h5
 ## See others params
-python examples/test.py --help
+tensorflow_asr test --help
 ```
diff --git a/docs/tutorials/tflite.md b/docs/tutorials/tflite.md
@@ -11,14 +11,14 @@
 ## Conversion
 
 ```bash
-python3 examples/tflite.py \
+tensorflow_asr tflite \
     --config-path=/path/to/config.yml.j2 \
     --h5=/path/to/weight.h5 \
     --bs=1 \ # Batch size
     --beam-width=0 \ # Beam width, set >0 to enable beam search
     --output=/path/to/output.tflite
 ## See others params
-python examples/tflite.py --help
+tensorflow_asr tflite --help
 ```
 
 ## Inference

diff --git a/docs/tutorials/training.md b/docs/tutorials/training.md
@@ -1,3 +1,12 @@
+- [Training Tutorial](#training-tutorial)
+  - [1. Install packages](#1-install-packages)
+  - [2. Prepare transcripts files](#2-prepare-transcripts-files)
+  - [3. Prepare config file](#3-prepare-config-file)
+  - [4. \[Optional\]\[Required if using TPUs\] Create tfrecords](#4-optionalrequired-if-using-tpus-create-tfrecords)
+  - [5. Generate vocabulary and metadata](#5-generate-vocabulary-and-metadata)
+  - [6. Run training](#6-run-training)
+
+
 # Training Tutorial
 
 These commands are example for librispeech dataset, but we can apply similar to other datasets
@@ -16,14 +25,14 @@ pip install ".[tf2.x]"
 This is the example for preparing transcript files for librispeech data corpus
 
 ```bash
-python scripts/create_librispeech_trans.py \
+tensorflow_asr utils create_librispeech_trans \
     --directory=/path/to/dataset/train-clean-100 \
     --output=/path/to/dataset/train-clean-100/transcripts.tsv
 ```
 
 Do the same thing with `train-clean-360`, `train-other-500`, `dev-clean`, `dev-other`, `test-clean`, `test-other`
 
-For other datasets, you must prepare your own python script like the `scripts/create_librispeech_trans.py`
+For other datasets, you must prepare your own python script like the `tensorflow_asr/scripts/utils/create_librispeech_trans.py`
 
 ## 3. Prepare config file
 
@@ -34,7 +43,7 @@ Please take a look in some examples for config files in `examples/*/*.yml.j2`
 ## 4. [Optional][Required if using TPUs] Create tfrecords
 
 ```bash
-python scripts/create_tfrecords.py \
+tensorflow_asr utils create_tfrecords \
     --config-path=/path/to/config.yml.j2 \
     --mode=\["train","eval","test"\] \
     --datadir=/path/to/datadir
@@ -47,7 +56,7 @@ You can reduce the flag `--modes` to `--modes=\["train","eval"\]` to only create
 This step requires defining path to vocabulary file and other options for generating vocabulary in config file.
 
 ```bash
-python scripts/prepare_vocab_and_metadata.py \
+tensorflow_asr utils prepare_vocab_and_metadata \
     --config-path=/path/to/config.yml.j2 \
     --datadir=/path/to/datadir
 ```
@@ -58,13 +67,13 @@ The inputs, outputs and other options of vocabulary are defined in the config fi
 ## 6. Run training
 
 ```bash
-python examples/train.py \
+tensorflow_asr train \
     --mxp=auto \
     --jit-compile \
     --config-path=/path/to/config.yml.j2 \
     --dataset-type=tfrecord \
     --modeldir=/path/to/modeldir \
     --datadir=/path/to/datadir
 ## See others params
-python examples/train.py --help
+tensorflow_asr train --help
 ```
diff --git a/...onfigs/librispeech/characters/char.yml.j2 → ...tasets/librispeech/characters/char.yml.j2 b/...onfigs/librispeech/characters/char.yml.j2 → ...tasets/librispeech/characters/char.yml.j2
@@ -1,5 +1,5 @@
 {% set vocabsize = 29 %}
-{% set vocabprefix = repodir ~ "/examples/configs/librispeech/characters/english" %}
+{% set vocabprefix = repodir ~ "/examples/datasets/librispeech/characters/english" %}
 {% set metadata = vocabprefix ~ ".metadata.json" %}
 
 decoder_config:
@@ -9,6 +9,7 @@ decoder_config:
   norm_score: True
   lm_config: null
   vocabulary: {{vocabprefix}}.vocab
+  vocab_size: {{vocabsize}}
 
-{% import "examples/configs/librispeech/data.yml.j2" as data_config with context %}
+{% import "examples/datasets/librispeech/config.yml.j2" as data_config with context %}
 {{data_config}}
diff --git a/...rispeech/characters/english.metadata.json → ...rispeech/characters/english.metadata.json b/...rispeech/characters/english.metadata.json → ...rispeech/characters/english.metadata.json
diff --git a/...figs/librispeech/characters/english.vocab → ...sets/librispeech/characters/english.vocab b/...figs/librispeech/characters/english.vocab → ...sets/librispeech/characters/english.vocab