Real Time Application of Nlp using pipeline text classification

Application of Text classification

Sentiment Analysis: Detects whether the sentiment is positive, negative, or neutral.
Spam Detection: Classifies text as spam or not spam.
Emotion Detection: Identifies emotions like joy, anger, sadness, etc.
Toxic Comment Classification: Flags comments as toxic or non-toxic.
Fake News Detection: Distinguishes between fake and real news.
Intent Recognition: Classifies user input as a question, statement, or command.
Review Classification: Evaluates reviews as positive, negative, or neutral.
Hate Speech Detection: Flags hateful or offensive speech.
Subjectivity Analysis: Determines if a statement is subjective or objective.
Topic Classification: Categorizes text into predefined topics.

from transformers import pipeline

# Initialize text classification pipeline
classifier = pipeline("text-classification")

# 1. Sentiment Analysis
def sentiment_analysis(text):
    return classifier(text)

# 2. Spam Detection
def spam_detection(text):
    categories = ["spam", "not spam"]
    return classifier(text, labels=categories)

# 3. Emotion Detection
def emotion_detection(text):
    categories = ["joy", "anger", "sadness", "fear", "surprise"]
    return classifier(text, labels=categories)

# 4. Toxic Comment Classification
def toxic_comment_detection(text):
    categories = ["toxic", "non-toxic"]
    return classifier(text, labels=categories)

# 5. Fake News Detection
def fake_news_detection(text):
    categories = ["fake", "real"]
    return classifier(text, labels=categories)

# 6. Intent Recognition
def intent_recognition(text):
    categories = ["question", "statement", "command"]
    return classifier(text, labels=categories)

# 7. Review Classification (Positive, Negative, Neutral)
def review_classification(text):
    categories = ["positive", "negative", "neutral"]
    return classifier(text, labels=categories)

# 8. Hate Speech Detection
def hate_speech_detection(text):
    categories = ["hate speech", "non-hate speech"]
    return classifier(text, labels=categories)

# 9. Subjectivity Analysis
def subjectivity_analysis(text):
    categories = ["subjective", "objective"]
    return classifier(text, labels=categories)

# 10. Topic Classification
def topic_classification(text):
    categories = ["technology", "health", "finance", "sports", "entertainment"]
    return classifier(text, labels=categories)

# Example Usage
if __name__ == "__main__":
    sample_text = "The new smartphone has amazing features and performance."

    print("1. Sentiment Analysis:", sentiment_analysis(sample_text))
    print("2. Spam Detection:", spam_detection("You won $1000! Click here to claim your prize."))
    print("3. Emotion Detection:", emotion_detection("I am so happy today!"))
    print("4. Toxic Comment Classification:", toxic_comment_detection("You are terrible at this game."))
    print("5. Fake News Detection:", fake_news_detection("The earth is flat."))
    print("6. Intent Recognition:", intent_recognition("What is the weather today?"))
    print("7. Review Classification:", review_classification("The movie was decent, but it could have been better."))
    print("8. Hate Speech Detection:", hate_speech_detection("I hate you and your ideas!"))
    print("9. Subjectivity Analysis:", subjectivity_analysis("In my opinion, this is the best book ever."))
    print("10. Topic Classification:", topic_classification("Artificial intelligence is transforming technology."))

Code Implementation for Automatic Blacklist Detection
Below is an example pipeline to automatically detect potentially harmful words or phrases using SpaCy and a pre-trained sentiment or spam detection model:

pip install transformers

import spacy
from transformers import pipeline

# Load SpaCy model
nlp = spacy.load("en_core_web_sm")

# Load a pre-trained transformer model for offensive/spam detection
spam_detector = pipeline("text-classification", model="unitary/toxic-bert")

# Define a function for automatic blacklist detection
def blacklist_detector(doc):
    flagged = []

    # Analyze each token in the document
    for token in doc:
        # Check if the word is flagged by the spam detector
        prediction = spam_detector(token.text)
        label = prediction[0]['label']
        score = prediction[0]['score']

        # Flag words if classified as harmful and confidence is high
        if label == "TOXIC" and score > 0.8:  # Adjust threshold as needed
            flagged.append(token.text)

    if flagged:
        print(f"Blacklisted words detected: {', '.join(flagged)}")

    return doc

# Add the blacklist detector to the pipeline
nlp.add_pipe(blacklist_detector, last=True)

# Test the pipeline
text = "This is a fake email and it might be a scam. Stop spamming!"
doc = nlp(text)

from transformers import pipeline

# Initialize zero-shot-classification pipeline
classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")

# Define applications
def sentiment_analysis(text):
    labels = ["positive", "negative", "neutral"]
    return classifier(text, candidate_labels=labels)

def spam_detection(text):
    labels = ["spam", "not spam"]
    return classifier(text, candidate_labels=labels)

def emotion_detection(text):
    labels = ["joy", "anger", "sadness", "fear", "love", "surprise"]
    return classifier(text, candidate_labels=labels)

def intent_classification(text):
    labels = ["informative", "query", "request", "command"]
    return classifier(text, candidate_labels=labels)

def topic_classification(text):
    labels = ["technology", "health", "finance", "education", "entertainment", "sports"]
    return classifier(text, candidate_labels=labels)

def customer_feedback_analysis(text):
    labels = ["product complaint", "service complaint", "positive feedback", "neutral feedback"]
    return classifier(text, candidate_labels=labels)

def hate_speech_detection(text):
    labels = ["hate speech", "non-hate speech"]
    return classifier(text, candidate_labels=labels)

def urgency_detection(text):
    labels = ["urgent", "non-urgent"]
    return classifier(text, candidate_labels=labels)

def fake_news_detection(text):
    labels = ["fake news", "real news"]
    return classifier(text, candidate_labels=labels)

def product_recommendation(text):
    labels = ["electronics", "fashion", "books", "home appliances", "sports equipment"]
    return classifier(text, candidate_labels=labels)

# Example usage
if __name__ == "__main__":
    sample_text = "I love this product, but the delivery was delayed."

    print("1. Sentiment Analysis:", sentiment_analysis(sample_text))
    print("2. Spam Detection:", spam_detection("Win $1000 now! Click here."))
    print("3. Emotion Detection:", emotion_detection("I am so happy and excited today!"))
    print("4. Intent Classification:", intent_classification("Can you help me reset my password?"))
    print("5. Topic Classification:", topic_classification("Artificial intelligence is transforming technology."))
    print("6. Customer Feedback Analysis:", customer_feedback_analysis(sample_text))
    print("7. Hate Speech Detection:", hate_speech_detection("I hate everyone in this group."))
    print("8. Urgency Detection:", urgency_detection("Please respond as soon as possible!"))
    print("9. Fake News Detection:", fake_news_detection("The earth is flat."))
    print("10. Product Recommendation:", product_recommendation("Looking for the best smartphone under $500."))

Use a Pre-Trained Model for Offensive Language Detection

import spacy
from transformers import pipeline

# Load SpaCy model
nlp = spacy.load("en_core_web_sm")

# Load a pre-trained model for offensive language detection
offensive_detector = pipeline("text-classification", model="unitary/toxic-bert")

def profanity_filter(doc):
    tokens = []
    for token in doc:
        # Use the pre-trained model to classify the token
        prediction = offensive_detector(token.text)
        label = prediction[0]['label']
        score = prediction[0]['score']

        # Replace the token if classified as offensive and confidence is high
        if label == "TOXIC" and score > 0.8:  # Adjust the threshold as needed
            tokens.append("***")
        else:
            tokens.append(token.text)

    # Store the censored text in a custom extension
    doc._.censored_text = " ".join(tokens)
    return doc

# Register the custom extension
from spacy.tokens import Doc
Doc.set_extension("censored_text", default=None)

# Add the profanity filter to the pipeline
nlp.add_pipe(profanity_filter, last=True)

# Test the pipeline
text = "This is a really badword and offensive content."
doc = nlp(text)
print(f"Censored Text: {doc._.censored_text}")

Debug School

Real Time Application of Nlp using pipeline text classification

Top comments (0)