def clean_up(s):
= ""
new_s for x in s:
if x in "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789 ":
if x == " ":
= new_s + "_"
new_s else:
= new_s + x
new_s return new_s.lower()
Comments on HW2 - String Clean Up
Fundamentals of Data Science
Python
4 Sample Solutions
def clean_string(input_string):
# Remove characters that are not numbers, letters, or spaces
= "".join(
cleaned_string for char in input_string if char.isalnum() or char == " "
char
)# Convert to lowercase
= cleaned_string.lower()
cleaned_string # Convert spaces to underscores
= cleaned_string.replace(" ", "_")
cleaned_string
return cleaned_string
def editing_2(x):
= ""
text_2 for char in x:
if char.isalnum() or char.isspace() or char.isalpha():
= text_2 + char
text_2 = text_2.lower()
text_2 = text_2.replace(" ", "_")
text_2 return text_2
import re
def p4(s):
= re.sub(r"[^a-zA-Z0-9 ]", "", s)
s = s.replace(" ", "_")
s return s.lower()
R
<- function(s) {
clean_up <- c(letters, LETTERS, " ", seq(0, 9)) # a vector of the things I want to keep
valid <- strsplit(s, "")[[1]]
split_string <- split_string[split_string %in% valid]
keepers <- tolower(gsub(" ", "_", keepers))
keepers paste(keepers, collapse = "")
}
<- function(string) {
string_edit <- c(letters, LETTERS, as.character(seq(0, 9)), " ")
acceptable_characters <- paste("[^", paste0(acceptable_characters, collapse = ""), "]", sep = "")
unacceptable_characters
<- string
cleaned_string
for (character in unacceptable_characters) {
<- gsub(character, "", cleaned_string)
cleaned_string
}
<- tolower(cleaned_string)
cleaned_string
<- gsub(" ", "_", cleaned_string)
cleaned_string
return(cleaned_string)
}
<- function(input) {
i_function <- gsub("[^[:alnum:] ]", "", input)
a1 <- gsub(" ", "_", a1)
a2 tolower(a2)
}
<- function(input_string) {
clean_and_format_string <- gsub("[^0-9a-zA-Z[:space:]]", "", input_string)
cleaned_string
<- tolower(cleaned_string)
cleaned_string
<- gsub(" ", "_", cleaned_string)
cleaned_string
return(cleaned_string)
}