require 'rss/maker' require 'open-uri' require 'iconv' require 'cgi' class String def esTitular !self.index('
') && !self[/([0-1][0-9]|2[0-3])(:|.)[0-5][0-9]/] end def esUltimaHora !self.index('
') && self[/([0-1][0-9]|2[0-3])(:|.)[0-5][0-9]/] end def obtenEnlace pos = self.index('/prontus4_nots') posIni = pos-1; stringStart = self[posIni,1] posEnd = self.index(stringStart,pos) newsLink = self[ posIni+1, posEnd - posIni - 1] newsLink end def obtenTitular posIni = self.index('>') + 1 posEnd = self.index('<',posIni) newsTitle = CGI::unescapeHTML( self[ posIni, posEnd-posIni ] ) newsTitle end def obtenDescripcion posIni = self.index('>') + 1 posEnd = self.index('<',posIni) newsDescripcion = CGI::unescapeHTML( self[ posIni, posEnd-posIni ] ) newsDescripcion end end version = "2.0" # ["0.9", "1.0", "2.0"] destination = "/home/vladimirprieto/vladimir.akilles.cl/myrss/estrella_iquique/estrella_iquique.xml" # local file to write #destination = "estrella_iquique.xml" # local file to write destination2 = "estrella_iquique.html" # local file to write wwwBase = 'http://www.estrellaiquique.cl' www = wwwBase + '/matriz/index.html' codigo = "" lines = [] open(www) do |s| codigo = s.read end #codigo = Iconv.iconv("UTF8","ISO8859-1",codigo) lines = codigo.split("\n") #sólo titulares noticias = [] lines.each_index {|i| if lines[i].index('titular') || lines[i].index('bajada') if lines[i+1].index('
') || lines[i+1].index('') || lines[i+2].index('