Application of Text classification
Sentiment Analysis
: Detects whether the sentiment is positive, negative, or neutral.
Spam Detection
: Classifies text as spam or not spam.
Emotion Detection
: Identifies emotions like joy, anger, sadness, etc.
Toxic Comment Classification
: Flags comments as toxic or non-toxic.
Fake News Detection
: Distinguishes between fake and real news.
Intent Recognition
: Classifies user input as a question, statement, or command.
Review Classification
: Evaluates reviews as positive, negative, or neutral.
Hate Speech Detection
: Flags hateful or offensive speech.
Subjectivity Analysis
: Determines if a statement is subjective or objective.
Topic Classification
: Categorizes text into predefined topics.
from transformers import pipeline
# Initialize text classification pipeline
classifier = pipeline("text-classification")
# 1. Sentiment Analysis
def sentiment_analysis(text):
return classifier(text)
# 2. Spam Detection
def spam_detection(text):
categories = ["spam", "not spam"]
return classifier(text, labels=categories)
# 3. Emotion Detection
def emotion_detection(text):
categories = ["joy", "anger", "sadness", "fear", "surprise"]
return classifier(text, labels=categories)
# 4. Toxic Comment Classification
def toxic_comment_detection(text):
categories = ["toxic", "non-toxic"]
return classifier(text, labels=categories)
# 5. Fake News Detection
def fake_news_detection(text):
categories = ["fake", "real"]
return classifier(text, labels=categories)
# 6. Intent Recognition
def intent_recognition(text):
categories = ["question", "statement", "command"]
return classifier(text, labels=categories)
# 7. Review Classification (Positive, Negative, Neutral)
def review_classification(text):
categories = ["positive", "negative", "neutral"]
return classifier(text, labels=categories)
# 8. Hate Speech Detection
def hate_speech_detection(text):
categories = ["hate speech", "non-hate speech"]
return classifier(text, labels=categories)
# 9. Subjectivity Analysis
def subjectivity_analysis(text):
categories = ["subjective", "objective"]
return classifier(text, labels=categories)
# 10. Topic Classification
def topic_classification(text):
categories = ["technology", "health", "finance", "sports", "entertainment"]
return classifier(text, labels=categories)
# Example Usage
if __name__ == "__main__":
sample_text = "The new smartphone has amazing features and performance."
print("1. Sentiment Analysis:", sentiment_analysis(sample_text))
print("2. Spam Detection:", spam_detection("You won $1000! Click here to claim your prize."))
print("3. Emotion Detection:", emotion_detection("I am so happy today!"))
print("4. Toxic Comment Classification:", toxic_comment_detection("You are terrible at this game."))
print("5. Fake News Detection:", fake_news_detection("The earth is flat."))
print("6. Intent Recognition:", intent_recognition("What is the weather today?"))
print("7. Review Classification:", review_classification("The movie was decent, but it could have been better."))
print("8. Hate Speech Detection:", hate_speech_detection("I hate you and your ideas!"))
print("9. Subjectivity Analysis:", subjectivity_analysis("In my opinion, this is the best book ever."))
print("10. Topic Classification:", topic_classification("Artificial intelligence is transforming technology."))
Code Implementation for Automatic Blacklist Detection
Below is an example pipeline to automatically detect potentially harmful words or phrases using SpaCy and a pre-trained sentiment or spam detection model:
pip install transformers
import spacy
from transformers import pipeline
# Load SpaCy model
nlp = spacy.load("en_core_web_sm")
# Load a pre-trained transformer model for offensive/spam detection
spam_detector = pipeline("text-classification", model="unitary/toxic-bert")
# Define a function for automatic blacklist detection
def blacklist_detector(doc):
flagged = []
# Analyze each token in the document
for token in doc:
# Check if the word is flagged by the spam detector
prediction = spam_detector(token.text)
label = prediction[0]['label']
score = prediction[0]['score']
# Flag words if classified as harmful and confidence is high
if label == "TOXIC" and score > 0.8: # Adjust threshold as needed
flagged.append(token.text)
if flagged:
print(f"Blacklisted words detected: {', '.join(flagged)}")
return doc
# Add the blacklist detector to the pipeline
nlp.add_pipe(blacklist_detector, last=True)
# Test the pipeline
text = "This is a fake email and it might be a scam. Stop spamming!"
doc = nlp(text)
from transformers import pipeline
# Initialize zero-shot-classification pipeline
classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
# Define applications
def sentiment_analysis(text):
labels = ["positive", "negative", "neutral"]
return classifier(text, candidate_labels=labels)
def spam_detection(text):
labels = ["spam", "not spam"]
return classifier(text, candidate_labels=labels)
def emotion_detection(text):
labels = ["joy", "anger", "sadness", "fear", "love", "surprise"]
return classifier(text, candidate_labels=labels)
def intent_classification(text):
labels = ["informative", "query", "request", "command"]
return classifier(text, candidate_labels=labels)
def topic_classification(text):
labels = ["technology", "health", "finance", "education", "entertainment", "sports"]
return classifier(text, candidate_labels=labels)
def customer_feedback_analysis(text):
labels = ["product complaint", "service complaint", "positive feedback", "neutral feedback"]
return classifier(text, candidate_labels=labels)
def hate_speech_detection(text):
labels = ["hate speech", "non-hate speech"]
return classifier(text, candidate_labels=labels)
def urgency_detection(text):
labels = ["urgent", "non-urgent"]
return classifier(text, candidate_labels=labels)
def fake_news_detection(text):
labels = ["fake news", "real news"]
return classifier(text, candidate_labels=labels)
def product_recommendation(text):
labels = ["electronics", "fashion", "books", "home appliances", "sports equipment"]
return classifier(text, candidate_labels=labels)
# Example usage
if __name__ == "__main__":
sample_text = "I love this product, but the delivery was delayed."
print("1. Sentiment Analysis:", sentiment_analysis(sample_text))
print("2. Spam Detection:", spam_detection("Win $1000 now! Click here."))
print("3. Emotion Detection:", emotion_detection("I am so happy and excited today!"))
print("4. Intent Classification:", intent_classification("Can you help me reset my password?"))
print("5. Topic Classification:", topic_classification("Artificial intelligence is transforming technology."))
print("6. Customer Feedback Analysis:", customer_feedback_analysis(sample_text))
print("7. Hate Speech Detection:", hate_speech_detection("I hate everyone in this group."))
print("8. Urgency Detection:", urgency_detection("Please respond as soon as possible!"))
print("9. Fake News Detection:", fake_news_detection("The earth is flat."))
print("10. Product Recommendation:", product_recommendation("Looking for the best smartphone under $500."))
Use a Pre-Trained Model for Offensive Language Detection
import spacy
from transformers import pipeline
# Load SpaCy model
nlp = spacy.load("en_core_web_sm")
# Load a pre-trained model for offensive language detection
offensive_detector = pipeline("text-classification", model="unitary/toxic-bert")
def profanity_filter(doc):
tokens = []
for token in doc:
# Use the pre-trained model to classify the token
prediction = offensive_detector(token.text)
label = prediction[0]['label']
score = prediction[0]['score']
# Replace the token if classified as offensive and confidence is high
if label == "TOXIC" and score > 0.8: # Adjust the threshold as needed
tokens.append("***")
else:
tokens.append(token.text)
# Store the censored text in a custom extension
doc._.censored_text = " ".join(tokens)
return doc
# Register the custom extension
from spacy.tokens import Doc
Doc.set_extension("censored_text", default=None)
# Add the profanity filter to the pipeline
nlp.add_pipe(profanity_filter, last=True)
# Test the pipeline
text = "This is a really badword and offensive content."
doc = nlp(text)
print(f"Censored Text: {doc._.censored_text}")
Top comments (0)