diff --git a/faster_whisper/transcribe.py b/faster_whisper/transcribe.py
index a31cf1e..bbff104 100644
--- a/faster_whisper/transcribe.py
+++ b/faster_whisper/transcribe.py
@@ -1,4 +1,5 @@
 import itertools
+import logging
 import os
 import zlib
 
@@ -11,7 +12,7 @@ import tokenizers
 from faster_whisper.audio import decode_audio
 from faster_whisper.feature_extractor import FeatureExtractor
 from faster_whisper.tokenizer import Tokenizer
-from faster_whisper.utils import download_model
+from faster_whisper.utils import download_model, format_timestamp, get_logger
 from faster_whisper.vad import (
     SpeechTimestampsMap,
     collect_chunks,
@@ -93,6 +94,8 @@ class WhisperModel:
             (concurrent calls to self.model.generate() will run in parallel).
             This can improve the global throughput at the cost of increased memory usage.
         """
+        self.logger = get_logger()
+
         if os.path.isdir(model_size_or_path):
             model_path = model_size_or_path
         else:
@@ -211,17 +214,40 @@ class WhisperModel:
             - a generator over transcribed segments
             - an instance of AudioInfo
         """
-        if not isinstance(audio, np.ndarray):
-            audio = decode_audio(
-                audio, sampling_rate=self.feature_extractor.sampling_rate
-            )
+        sampling_rate = self.feature_extractor.sampling_rate
 
-        duration = audio.shape[0] / self.feature_extractor.sampling_rate
+        if not isinstance(audio, np.ndarray):
+            audio = decode_audio(audio, sampling_rate=sampling_rate)
+
+        duration = audio.shape[0] / sampling_rate
+
+        self.logger.info(
+            "Processing audio with duration %s", format_timestamp(duration)
+        )
 
         if vad_filter:
             vad_parameters = {} if vad_parameters is None else vad_parameters
             speech_chunks = get_speech_timestamps(audio, **vad_parameters)
             audio = collect_chunks(audio, speech_chunks)
+
+            self.logger.info(
+                "VAD filter removed %s of audio",
+                format_timestamp(duration - (audio.shape[0] / sampling_rate)),
+            )
+
+            if self.logger.isEnabledFor(logging.DEBUG):
+                self.logger.debug(
+                    "VAD filter kept the following audio segments: %s",
+                    ", ".join(
+                        "[%s -> %s]"
+                        % (
+                            format_timestamp(chunk["start"] / sampling_rate),
+                            format_timestamp(chunk["end"] / sampling_rate),
+                        )
+                        for chunk in speech_chunks
+                    ),
+                )
+
         else:
             speech_chunks = None
 
@@ -239,6 +265,12 @@ class WhisperModel:
                 results = self.model.detect_language(encoder_output)
                 language_token, language_probability = results[0][0]
                 language = language_token[2:-2]
+
+                self.logger.info(
+                    "Detected language '%s' with probability %.2f",
+                    language,
+                    language_probability,
+                )
         else:
             language_probability = 1
 
@@ -275,9 +307,7 @@ class WhisperModel:
         segments = self.generate_segments(features, tokenizer, options, encoder_output)
 
         if speech_chunks:
-            segments = restore_speech_timestamps(
-                segments, speech_chunks, self.feature_extractor.sampling_rate
-            )
+            segments = restore_speech_timestamps(segments, speech_chunks, sampling_rate)
 
         audio_info = AudioInfo(
             language=language,
@@ -312,6 +342,11 @@ class WhisperModel:
             )
             segment_duration = segment_size * self.feature_extractor.time_per_frame
 
+            if self.logger.isEnabledFor(logging.DEBUG):
+                self.logger.debug(
+                    "Processing segment at %s", format_timestamp(time_offset)
+                )
+
             previous_tokens = all_tokens[prompt_reset_since:]
             prompt = self.get_prompt(
                 tokenizer,
@@ -339,6 +374,12 @@ class WhisperModel:
                     should_skip = False
 
                 if should_skip:
+                    self.logger.debug(
+                        "No speech threshold is met (%f > %f)",
+                        result.no_speech_prob,
+                        options.no_speech_threshold,
+                    )
+
                     # fast-forward to the next segment boundary
                     seek += segment_size
                     continue
@@ -543,12 +584,26 @@ class WhisperModel:
             ):
                 needs_fallback = True  # too repetitive
 
+                self.logger.debug(
+                    "Compression ratio threshold is not met with temperature %.1f (%f > %f)",
+                    temperature,
+                    compression_ratio,
+                    options.compression_ratio_threshold,
+                )
+
             if (
                 options.log_prob_threshold is not None
                 and avg_log_prob < options.log_prob_threshold
             ):
                 needs_fallback = True  # average log probability is too low
 
+                self.logger.debug(
+                    "Log probability threshold is not met with temperature %.1f (%f < %f)",
+                    temperature,
+                    avg_log_prob,
+                    options.log_prob_threshold,
+                )
+
             if not needs_fallback:
                 break
 
diff --git a/faster_whisper/utils.py b/faster_whisper/utils.py
index 71ec9d5..66c7161 100644
--- a/faster_whisper/utils.py
+++ b/faster_whisper/utils.py
@@ -1,3 +1,4 @@
+import logging
 import os
 
 from typing import Optional
@@ -25,6 +26,11 @@ def get_assets_path():
     return os.path.join(os.path.dirname(os.path.abspath(__file__)), "assets")
 
 
+def get_logger():
+    """Returns the module logger."""
+    return logging.getLogger("faster_whisper")
+
+
 def download_model(size: str, output_dir: Optional[str] = None):
     """Downloads a CTranslate2 Whisper model from the Hugging Face Hub.