Spaces:
Running
Running
| import gradio as gr | |
| from transformers import AutoTokenizer, AutoModelForSeq2SeqLM | |
| import torch | |
| import random | |
| import re | |
| import warnings | |
| warnings.filterwarnings("ignore") | |
| class SimpleHumanizer: | |
| def __init__(self): | |
| # Load a reliable T5 model for paraphrasing | |
| try: | |
| self.model_name = "Vamsi/T5_Paraphrase_Paws" | |
| self.tokenizer = AutoTokenizer.from_pretrained(self.model_name, use_fast=False) | |
| self.model = AutoModelForSeq2SeqLM.from_pretrained(self.model_name) | |
| print("β Model loaded successfully") | |
| except Exception as e: | |
| print(f"β Error loading model: {e}") | |
| self.tokenizer = None | |
| self.model = None | |
| def add_variations(self, text): | |
| """Add simple variations to make text more natural""" | |
| # Common academic phrase variations | |
| replacements = { | |
| "shows that": ["demonstrates that", "indicates that", "reveals that", "suggests that"], | |
| "results in": ["leads to", "causes", "produces", "generates"], | |
| "due to": ["because of", "owing to", "as a result of", "on account of"], | |
| "in order to": ["to", "so as to", "with the aim of", "for the purpose of"], | |
| "as well as": ["and", "along with", "together with", "in addition to"], | |
| "therefore": ["thus", "hence", "consequently", "as a result"], | |
| "however": ["nevertheless", "nonetheless", "on the other hand", "yet"], | |
| "furthermore": ["moreover", "additionally", "in addition", "what is more"], | |
| "significant": ["notable", "considerable", "substantial", "important"], | |
| "important": ["crucial", "vital", "essential", "key"], | |
| "analyze": ["examine", "investigate", "study", "assess"], | |
| "demonstrate": ["show", "illustrate", "reveal", "display"], | |
| "utilize": ["use", "employ", "apply", "implement"] | |
| } | |
| result = text | |
| for original, alternatives in replacements.items(): | |
| if original in result.lower(): | |
| replacement = random.choice(alternatives) | |
| # Replace with case matching | |
| pattern = re.compile(re.escape(original), re.IGNORECASE) | |
| result = pattern.sub(replacement, result, count=1) | |
| return result | |
| def vary_sentence_structure(self, text): | |
| """Simple sentence structure variations""" | |
| sentences = text.split('.') | |
| varied = [] | |
| for sentence in sentences: | |
| sentence = sentence.strip() | |
| if not sentence: | |
| continue | |
| # Add some variety to sentence starters | |
| if random.random() < 0.3: | |
| starters = ["Notably, ", "Importantly, ", "Significantly, ", "Interestingly, "] | |
| if not any(sentence.startswith(s.strip()) for s in starters): | |
| sentence = random.choice(starters) + sentence.lower() | |
| varied.append(sentence) | |
| return '. '.join(varied) + '.' | |
| def paraphrase_text(self, text): | |
| """Paraphrase using T5 model""" | |
| if not self.model or not self.tokenizer: | |
| return text | |
| try: | |
| # Split long text into chunks | |
| max_length = 400 | |
| if len(text) > max_length: | |
| sentences = text.split('.') | |
| chunks = [] | |
| current_chunk = "" | |
| for sentence in sentences: | |
| if len(current_chunk + sentence) < max_length: | |
| current_chunk += sentence + "." | |
| else: | |
| if current_chunk: | |
| chunks.append(current_chunk.strip()) | |
| current_chunk = sentence + "." | |
| if current_chunk: | |
| chunks.append(current_chunk.strip()) | |
| paraphrased_chunks = [] | |
| for chunk in chunks: | |
| para = self._paraphrase_chunk(chunk) | |
| paraphrased_chunks.append(para) | |
| return " ".join(paraphrased_chunks) | |
| else: | |
| return self._paraphrase_chunk(text) | |
| except Exception as e: | |
| print(f"Paraphrasing error: {e}") | |
| return text | |
| def _paraphrase_chunk(self, text): | |
| """Paraphrase a single chunk""" | |
| try: | |
| # Prepare input | |
| input_text = f"paraphrase: {text}" | |
| input_ids = self.tokenizer.encode( | |
| input_text, | |
| return_tensors="pt", | |
| max_length=512, | |
| truncation=True | |
| ) | |
| # Generate paraphrase | |
| with torch.no_grad(): | |
| outputs = self.model.generate( | |
| input_ids=input_ids, | |
| max_length=min(len(text.split()) + 50, 512), | |
| num_beams=5, | |
| num_return_sequences=1, | |
| temperature=1.3, | |
| top_k=50, | |
| top_p=0.95, | |
| do_sample=True, | |
| early_stopping=True, | |
| repetition_penalty=1.2 | |
| ) | |
| # Decode result | |
| paraphrased = self.tokenizer.decode(outputs[0], skip_special_tokens=True) | |
| # Clean up the result | |
| paraphrased = paraphrased.strip() | |
| if paraphrased and len(paraphrased) > 10: | |
| return paraphrased | |
| else: | |
| return text | |
| except Exception as e: | |
| print(f"Chunk paraphrasing error: {e}") | |
| return text | |
| # Initialize humanizer | |
| humanizer = SimpleHumanizer() | |
| def humanize_text(input_text, complexity="Medium"): | |
| """Main humanization function""" | |
| if not input_text or not input_text.strip(): | |
| return "Please enter some text to humanize." | |
| try: | |
| # Step 1: Paraphrase the text | |
| result = humanizer.paraphrase_text(input_text) | |
| # Step 2: Add variations based on complexity | |
| if complexity in ["Medium", "High"]: | |
| result = humanizer.add_variations(result) | |
| if complexity == "High": | |
| result = humanizer.vary_sentence_structure(result) | |
| # Step 3: Clean up formatting | |
| result = re.sub(r'\s+', ' ', result) | |
| result = re.sub(r'\s+([.!?,:;])', r'\1', result) | |
| # Ensure proper sentence capitalization | |
| sentences = result.split('. ') | |
| formatted_sentences = [] | |
| for i, sentence in enumerate(sentences): | |
| sentence = sentence.strip() | |
| if sentence: | |
| # Capitalize first letter | |
| sentence = sentence[0].upper() + sentence[1:] if len(sentence) > 1 else sentence.upper() | |
| formatted_sentences.append(sentence) | |
| result = '. '.join(formatted_sentences) | |
| # Final cleanup | |
| if not result.endswith('.') and not result.endswith('!') and not result.endswith('?'): | |
| result += '.' | |
| return result | |
| except Exception as e: | |
| print(f"Humanization error: {e}") | |
| return f"Error processing text: {str(e)}" | |
| # Create Gradio interface | |
| demo = gr.Interface( | |
| fn=humanize_text, | |
| inputs=[ | |
| gr.Textbox( | |
| lines=10, | |
| placeholder="Paste your AI-generated or robotic text here...", | |
| label="Input Text", | |
| info="Enter the text you want to humanize" | |
| ), | |
| gr.Radio( | |
| choices=["Low", "Medium", "High"], | |
| value="Medium", | |
| label="Humanization Complexity", | |
| info="Low: Basic paraphrasing | Medium: + Vocabulary variations | High: + Structure changes" | |
| ) | |
| ], | |
| outputs=gr.Textbox( | |
| label="Humanized Output", | |
| lines=10, | |
| show_copy_button=True | |
| ), | |
| title="π€β‘οΈπ¨ AI Text Humanizer (Simple)", | |
| description=""" | |
| **Transform robotic AI text into natural, human-like writing** | |
| This tool uses advanced paraphrasing techniques to make AI-generated text sound more natural and human-like. | |
| Perfect for academic papers, essays, reports, and any content that needs to pass AI detection tools. | |
| **Features:** | |
| β Advanced T5-based paraphrasing | |
| β Vocabulary diversification | |
| β Sentence structure optimization | |
| β Academic tone preservation | |
| β Natural flow enhancement | |
| """, | |
| examples=[ | |
| [ | |
| "The implementation of machine learning algorithms in data processing systems demonstrates significant improvements in efficiency and accuracy metrics.", | |
| "Medium" | |
| ], | |
| [ | |
| "Artificial intelligence technologies are increasingly being utilized across various industries to enhance operational capabilities and drive innovation.", | |
| "High" | |
| ] | |
| ], | |
| theme="soft" | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch( | |
| share=False, | |
| server_name="0.0.0.0", | |
| server_port=7861, | |
| debug=True | |
| ) | |