Debug School

rakesh kumar
rakesh kumar

Posted on

How tensorflow pipeline is used in neural network

How to create batch
create batch and passed to neural network

How to create batch

import tensorflow as tf

# Simulate a dataset of integers from 0 to 99
data = tf.data.Dataset.range(100)

# Shuffle, batch, and map transformations
pipeline = (
    data
    .shuffle(buffer_size=100)  # Randomize the data
    .batch(10)                 # Batch in groups of 10
    .map(lambda x: x * 2)      # Multiply each element by 2
    .prefetch(buffer_size=tf.data.AUTOTUNE)  # Prefetch for efficiency
)

# Print the output from each batch
for batch in pipeline:
    print(batch.numpy())
Enter fullscreen mode Exit fullscreen mode

Image description

output

[60 36 82 58 76 56 26 74 62 18]
[94 84 22 10 80 66 48 50  2 88]
...
Enter fullscreen mode Exit fullscreen mode

Another Example

import tensorflow as tf

# Example dataset: List of sentences and corresponding labels
sentences = [
    "TensorFlow is great for machine learning",
    "Natural language processing is fun",
    "I love creating deep learning models",
    "Transformers have revolutionized NLP",
    "TensorFlow Hub provides pre-trained models"
]
labels = [1, 0, 1, 0, 1]  # Example binary labels

# Create a tf.data Dataset from sentences and labels
data = tf.data.Dataset.from_tensor_slices((sentences, labels))

# Text vectorization layer to tokenize and convert text to sequences
vectorizer = tf.keras.layers.TextVectorization(output_mode='int', max_tokens=1000)
vectorizer.adapt(sentences)  # Fit the vectorizer on the data

# Pipeline: Shuffle, batch, tokenize, and prefetch
pipeline = (
    data
    .shuffle(buffer_size=5)  # Shuffle the sentences
    .batch(2)                # Batch data in groups of 2
    .map(lambda x, y: (vectorizer(x), y))  # Tokenize sentences
    .prefetch(buffer_size=tf.data.AUTOTUNE)  # Prefetch for efficiency
)

# Print the output from each batch
for batch, labels in pipeline:
    print("Batch (tokenized):", batch.numpy())
    print("Labels:", labels.numpy())
Enter fullscreen mode Exit fullscreen mode

output
Image description

create batch and passed to neural network

We passed pipeline batch during training

model.fit(pipeline, epochs=5)
Enter fullscreen mode Exit fullscreen mode

Text Classification Example
For text classification, we'll use the pipeline and pass batches to a simple neural network. Here, we’ll build a binary classification model using an embedding layer and a few dense layers.

import tensorflow as tf
from tensorflow.keras import layers

# Example sentences and labels
sentences = [
    "TensorFlow is great for machine learning",
    "Natural language processing is fun",
    "I love creating deep learning models",
    "Transformers have revolutionized NLP",
    "TensorFlow Hub provides pre-trained models"
]
labels = [1, 0, 1, 0, 1]  # Example binary labels

# Create tf.data Dataset and TextVectorization layer
data = tf.data.Dataset.from_tensor_slices((sentences, labels))
vectorizer = tf.keras.layers.TextVectorization(output_mode='int', max_tokens=1000)
vectorizer.adapt(sentences)

# Define the pipeline
pipeline = (
    data
    .shuffle(buffer_size=5)
    .batch(2)
    .map(lambda x, y: (vectorizer(x), y))
    .prefetch(buffer_size=tf.data.AUTOTUNE)
)

# Define a simple text classification model
model = tf.keras.Sequential([
    layers.Embedding(input_dim=1000, output_dim=64),
    layers.GlobalAveragePooling1D(),
    layers.Dense(16, activation='relu'),
    layers.Dense(1, activation='sigmoid')  # Sigmoid for binary classification
])

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
model.fit(pipeline, epochs=5)

# Evaluate the model (optional)
loss, accuracy = model.evaluate(pipeline)
print(f"Loss: {loss}, Accuracy: {accuracy}")

# Function to classify new input text
def classify_text(input_text):
    # Process the input text
    vectorized_text = vectorizer(tf.constant([input_text]))  # Vectorize the input
    prediction = model.predict(vectorized_text)  # Make prediction
    predicted_class = 1 if prediction[0][0] > 0.5 else 0  # Convert probability to class label
    return predicted_class, prediction[0][0]  # Return class and probability

# Example usage of the classify function
new_sentence = "I enjoy learning about AI and machine learning"
predicted_class, probability = classify_text(new_sentence)
print(f"Input Text: '{new_sentence}'")
print(f"Predicted Class: {predicted_class}, Probability: {probability:.4f}")
Enter fullscreen mode Exit fullscreen mode

Image description

Image description

Image description

Text Summary

import tensorflow as tf
from tensorflow.keras import layers

# Example sentences and corresponding summaries
sentences = [
    "TensorFlow is great for machine learning",
    "Natural language processing is fun",
    "I love creating deep learning models",
    "Transformers have revolutionized NLP",
    "TensorFlow Hub provides pre-trained models"
]
summaries = [
    "ML with TensorFlow",
    "Fun with NLP",
    "Deep learning models",
    "NLP with transformers",
    "Pre-trained models on TF Hub"
]

# Create tf.data Dataset and TextVectorization layers
data = tf.data.Dataset.from_tensor_slices((sentences, summaries))
input_vectorizer = tf.keras.layers.TextVectorization(output_mode='int', max_tokens=1000)
output_vectorizer = tf.keras.layers.TextVectorization(output_mode='int', max_tokens=1000)

input_vectorizer.adapt(sentences)
output_vectorizer.adapt(summaries)

# Define the pipeline
pipeline = (
    data
    .shuffle(buffer_size=5)
    .batch(2)
    .map(lambda x, y: (input_vectorizer(x), output_vectorizer(y)))
    .prefetch(buffer_size=tf.data.AUTOTUNE)
)

# Define the sequence-to-sequence model
class Seq2SeqModel(tf.keras.Model):
    def __init__(self, vocab_size, embedding_dim, units):
        super().__init__()
        self.encoder_embedding = layers.Embedding(vocab_size, embedding_dim)
        self.encoder_gru = layers.GRU(units, return_sequences=True, return_state=True)

        self.decoder_embedding = layers.Embedding(vocab_size, embedding_dim)
        self.decoder_gru = layers.GRU(units, return_sequences=True, return_state=True)
        self.dense = layers.Dense(vocab_size)

    def call(self, inputs, training=False):
        encoder_input, decoder_input = inputs
        encoder_embedded = self.encoder_embedding(encoder_input)
        encoder_output, encoder_state = self.encoder_gru(encoder_embedded)

        decoder_embedded = self.decoder_embedding(decoder_input)
        decoder_output, _ = self.decoder_gru(decoder_embedded, initial_state=encoder_state)

        return self.dense(decoder_output)

vocab_size = 1000
embedding_dim = 64
units = 128

# Instantiate the model
model = Seq2SeqModel(vocab_size, embedding_dim, units)

# Compile the model
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy')

# Prepare the inputs and targets for training (shifted decoder inputs)
def prepare_pipeline(data):
    for enc_in, dec_out in data:
        dec_in = dec_out[:, :-1]  # Decoder input (except last token)
        dec_out_shifted = dec_out[:, 1:]  # Decoder target (except first token)
        yield (enc_in, dec_in), dec_out_shifted

train_data = tf.data.Dataset.from_generator(lambda: prepare_pipeline(pipeline), output_signature=(
    (tf.TensorSpec(shape=(None, None), dtype=tf.int32), tf.TensorSpec(shape=(None, None), dtype=tf.int32)),
    tf.TensorSpec(shape=(None, None), dtype=tf.int32)
)).batch(2)

# Train the model
model.fit(train_data, epochs=5)

# Function to generate summary for a new sentence
def generate_summary(input_sentence, max_length=10):
    # Vectorize input sentence
    input_seq = input_vectorizer(tf.constant([input_sentence]))
    # Start the decoding process
    start_token = output_vectorizer("start")[0]  # Assume "start" is the start token
    generated_tokens = [start_token]

    for _ in range(max_length):
        decoder_input = tf.constant([generated_tokens])  # Prepare decoder input
        predictions = model.predict((input_seq, decoder_input))  # Get predictions
        predicted_token = tf.argmax(predictions[:, -1, :], axis=-1).numpy()[0]  # Get the last prediction
        if predicted_token == output_vectorizer.vocabulary_size():  # Assuming last index is for end token
            break
        generated_tokens.append(predicted_token)  # Append predicted token

    # Convert token indices to words
    summary = output_vectorizer.get_vocabulary()[0:output_vectorizer.vocabulary_size()]  # Vocabulary
    return ' '.join([summary[token] for token in generated_tokens if token < output_vectorizer.vocabulary_size()])

# Example usage of the generate_summary function
new_sentence = "Exploring machine learning with TensorFlow"
predicted_summary = generate_summary(new_sentence)
print(f"Input Sentence: '{new_sentence}'")
print(f"Generated Summary: '{predicted_summary}'")
Enter fullscreen mode Exit fullscreen mode

Top comments (0)