Debug School

rakesh kumar
rakesh kumar

Posted on

Real Time Application of Nlp using pipeline text classification

Application of Text classification

Sentiment Analysis: Detects whether the sentiment is positive, negative, or neutral.
Spam Detection: Classifies text as spam or not spam.
Emotion Detection: Identifies emotions like joy, anger, sadness, etc.
Toxic Comment Classification: Flags comments as toxic or non-toxic.
Fake News Detection: Distinguishes between fake and real news.
Intent Recognition: Classifies user input as a question, statement, or command.
Review Classification: Evaluates reviews as positive, negative, or neutral.
Hate Speech Detection: Flags hateful or offensive speech.
Subjectivity Analysis: Determines if a statement is subjective or objective.
Topic Classification: Categorizes text into predefined topics.

from transformers import pipeline

# Initialize text classification pipeline
classifier = pipeline("text-classification")

# 1. Sentiment Analysis
def sentiment_analysis(text):
    return classifier(text)

# 2. Spam Detection
def spam_detection(text):
    categories = ["spam", "not spam"]
    return classifier(text, labels=categories)

# 3. Emotion Detection
def emotion_detection(text):
    categories = ["joy", "anger", "sadness", "fear", "surprise"]
    return classifier(text, labels=categories)

# 4. Toxic Comment Classification
def toxic_comment_detection(text):
    categories = ["toxic", "non-toxic"]
    return classifier(text, labels=categories)

# 5. Fake News Detection
def fake_news_detection(text):
    categories = ["fake", "real"]
    return classifier(text, labels=categories)

# 6. Intent Recognition
def intent_recognition(text):
    categories = ["question", "statement", "command"]
    return classifier(text, labels=categories)

# 7. Review Classification (Positive, Negative, Neutral)
def review_classification(text):
    categories = ["positive", "negative", "neutral"]
    return classifier(text, labels=categories)

# 8. Hate Speech Detection
def hate_speech_detection(text):
    categories = ["hate speech", "non-hate speech"]
    return classifier(text, labels=categories)

# 9. Subjectivity Analysis
def subjectivity_analysis(text):
    categories = ["subjective", "objective"]
    return classifier(text, labels=categories)

# 10. Topic Classification
def topic_classification(text):
    categories = ["technology", "health", "finance", "sports", "entertainment"]
    return classifier(text, labels=categories)

# Example Usage
if __name__ == "__main__":
    sample_text = "The new smartphone has amazing features and performance."

    print("1. Sentiment Analysis:", sentiment_analysis(sample_text))
    print("2. Spam Detection:", spam_detection("You won $1000! Click here to claim your prize."))
    print("3. Emotion Detection:", emotion_detection("I am so happy today!"))
    print("4. Toxic Comment Classification:", toxic_comment_detection("You are terrible at this game."))
    print("5. Fake News Detection:", fake_news_detection("The earth is flat."))
    print("6. Intent Recognition:", intent_recognition("What is the weather today?"))
    print("7. Review Classification:", review_classification("The movie was decent, but it could have been better."))
    print("8. Hate Speech Detection:", hate_speech_detection("I hate you and your ideas!"))
    print("9. Subjectivity Analysis:", subjectivity_analysis("In my opinion, this is the best book ever."))
    print("10. Topic Classification:", topic_classification("Artificial intelligence is transforming technology."))
Enter fullscreen mode Exit fullscreen mode

Code Implementation for Automatic Blacklist Detection
Below is an example pipeline to automatically detect potentially harmful words or phrases using SpaCy and a pre-trained sentiment or spam detection model:

pip install transformers
Enter fullscreen mode Exit fullscreen mode
import spacy
from transformers import pipeline

# Load SpaCy model
nlp = spacy.load("en_core_web_sm")

# Load a pre-trained transformer model for offensive/spam detection
spam_detector = pipeline("text-classification", model="unitary/toxic-bert")

# Define a function for automatic blacklist detection
def blacklist_detector(doc):
    flagged = []

    # Analyze each token in the document
    for token in doc:
        # Check if the word is flagged by the spam detector
        prediction = spam_detector(token.text)
        label = prediction[0]['label']
        score = prediction[0]['score']

        # Flag words if classified as harmful and confidence is high
        if label == "TOXIC" and score > 0.8:  # Adjust threshold as needed
            flagged.append(token.text)

    if flagged:
        print(f"Blacklisted words detected: {', '.join(flagged)}")

    return doc

# Add the blacklist detector to the pipeline
nlp.add_pipe(blacklist_detector, last=True)

# Test the pipeline
text = "This is a fake email and it might be a scam. Stop spamming!"
doc = nlp(text)
Enter fullscreen mode Exit fullscreen mode
from transformers import pipeline

# Initialize zero-shot-classification pipeline
classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")

# Define applications
def sentiment_analysis(text):
    labels = ["positive", "negative", "neutral"]
    return classifier(text, candidate_labels=labels)

def spam_detection(text):
    labels = ["spam", "not spam"]
    return classifier(text, candidate_labels=labels)

def emotion_detection(text):
    labels = ["joy", "anger", "sadness", "fear", "love", "surprise"]
    return classifier(text, candidate_labels=labels)

def intent_classification(text):
    labels = ["informative", "query", "request", "command"]
    return classifier(text, candidate_labels=labels)

def topic_classification(text):
    labels = ["technology", "health", "finance", "education", "entertainment", "sports"]
    return classifier(text, candidate_labels=labels)

def customer_feedback_analysis(text):
    labels = ["product complaint", "service complaint", "positive feedback", "neutral feedback"]
    return classifier(text, candidate_labels=labels)

def hate_speech_detection(text):
    labels = ["hate speech", "non-hate speech"]
    return classifier(text, candidate_labels=labels)

def urgency_detection(text):
    labels = ["urgent", "non-urgent"]
    return classifier(text, candidate_labels=labels)

def fake_news_detection(text):
    labels = ["fake news", "real news"]
    return classifier(text, candidate_labels=labels)

def product_recommendation(text):
    labels = ["electronics", "fashion", "books", "home appliances", "sports equipment"]
    return classifier(text, candidate_labels=labels)

# Example usage
if __name__ == "__main__":
    sample_text = "I love this product, but the delivery was delayed."

    print("1. Sentiment Analysis:", sentiment_analysis(sample_text))
    print("2. Spam Detection:", spam_detection("Win $1000 now! Click here."))
    print("3. Emotion Detection:", emotion_detection("I am so happy and excited today!"))
    print("4. Intent Classification:", intent_classification("Can you help me reset my password?"))
    print("5. Topic Classification:", topic_classification("Artificial intelligence is transforming technology."))
    print("6. Customer Feedback Analysis:", customer_feedback_analysis(sample_text))
    print("7. Hate Speech Detection:", hate_speech_detection("I hate everyone in this group."))
    print("8. Urgency Detection:", urgency_detection("Please respond as soon as possible!"))
    print("9. Fake News Detection:", fake_news_detection("The earth is flat."))
    print("10. Product Recommendation:", product_recommendation("Looking for the best smartphone under $500."))
Enter fullscreen mode Exit fullscreen mode

Use a Pre-Trained Model for Offensive Language Detection

import spacy
from transformers import pipeline

# Load SpaCy model
nlp = spacy.load("en_core_web_sm")

# Load a pre-trained model for offensive language detection
offensive_detector = pipeline("text-classification", model="unitary/toxic-bert")

def profanity_filter(doc):
    tokens = []
    for token in doc:
        # Use the pre-trained model to classify the token
        prediction = offensive_detector(token.text)
        label = prediction[0]['label']
        score = prediction[0]['score']

        # Replace the token if classified as offensive and confidence is high
        if label == "TOXIC" and score > 0.8:  # Adjust the threshold as needed
            tokens.append("***")
        else:
            tokens.append(token.text)

    # Store the censored text in a custom extension
    doc._.censored_text = " ".join(tokens)
    return doc

# Register the custom extension
from spacy.tokens import Doc
Doc.set_extension("censored_text", default=None)

# Add the profanity filter to the pipeline
nlp.add_pipe(profanity_filter, last=True)

# Test the pipeline
text = "This is a really badword and offensive content."
doc = nlp(text)
print(f"Censored Text: {doc._.censored_text}")
Enter fullscreen mode Exit fullscreen mode

Top comments (0)