#!/net/u/1/f/fdc/kermit/wermit + # exit 1 SORRY - AMAZON.COM NO LONGER RESPONDS TO HTTP GETs from non-Web browsers This script no longer works at all. When this script was written in 2014, Amazon.com would respond to an HTTP GET (did you know C-Kermit includes an HTTP client?) for a product URL (such as http://www.amazon.com/dp/B000FPN8TK/) by sending the product page, if it still existed, or with an error if the product code (ASIN) had been retired. If the page was was successfully retrieved it could be scanned for "Currently unavailable". Thus we could easily tell from the Unix command line if a given product was still being sold, or out of stock, or had simply disappeared. This script was designed to check all the Amazon ASINs in a given Web page in one batch, producing a simple list of all the broken or problematic product links in the web page. BUT NOW when we send an HTTP GET from this script to Amazon.com, instead of the desired product page we get a page that says "Sorry, we just need to make sure you're not a robot... Type the characters you see in this image..." The comments in the page source say: "To discuss automated access to Amazon data please contact api-services-support @amazon.com. For information about migrating to our APIs refer to our Marketpla ce APIs at https://developer.amazonservices.com/ref=rm_c_sv, or our Product Adv ertising API at https://affiliate-program.amazon.com/gp/advertising/api/detail/ main.html/ref=rm_c_ac for advertising use cases." It might be possible to write a Kermit script that creates a Web page that contains Javascript to query the desired products. See: https://aws.amazon.com/code/JavaScript Various other ASIN lookup tools existed in the past but they won't work any more either. - fdc, 27 Feb 2017 # Finds stale Amazon.com product links. Scans a web page for links to # Amazon product pages that include Amazon Standard Identification Numbers # (ASINs), which are ten-character strings that begin with B00 followed by # seven more characters that can be decimal digits or ASCII uppercase letters, # and then checks the availabity of the corresponding product in Amazon.com, # producing a report like this: # # OK: B000FPN8TK Adagio Teas Ingenuitea Teapot # OK: B001F51QQG Gillette GoodNews! Regular Disposable Razor 12 Count # OK: B002RL9BS2 Wobble Wedges Leveling Shims # OK: B0026I2HLO Fellowes Powershred W-11C 11 Sheet Cross-Cut Shredder # OK: B005KD2O0Q Rubbermaid Comm Prod Brownstone Medium Brown Bath Mat # # In which the product status is indicated by the first field: # # OK: The product is available # NA: The product is listed at Amazon but is not currently available # XX: The ASIN is not valid, e.g. when product page has disappeared. # # If an Amazon product link includes a title, it is included in the listing, # as shown above. # # Usage: # checkasins [ options ] filename # # Where: # filename is the name of a local html file containing links to Amazon. # options are: # --errorsonly (show only links to products that are not available) # --okonly (show only links to products that are available)' # If no options given, the status of all Amazon links is shown. # # Currently works only for Amazon.com, not Amazon.mx, Amazon.de, nor any of # the other Amazon national branches, nor does it work for ISBNs. Also # assumes there is not more than one product per line in the HTML file. # # Requires C-Kermit 9.0 or later. # # Frank da Cruz, January 2014 # Updated 27 April 2014 to allow for more-complicated Amazon URLs, # and for ISBNs as well as ASINs. # .tmpfile = /tmp/__az.tmp # Temporary location for Amazon page def usage { echo "Usage: \fbasename(\%0) [options] filename-of-webpage" echo "Options:" echo " --errorsonly (only show ASINs of unavailable products)" echo " --okonly (only show ASINs of available products)" echo " --soldout (stop reading file at line containing SOLD OUT)" echo " --help (print this message)" } if < \v(argc) 2 { usage, exit 1 } define ON_CTRLC { # Ctrl-C trap fclose all # Close all files if exist \m(tmpfile) delete /quiet \m(tmpfile) # Delete any temp file http close exit } .errorsonly = 0 # Command-line options .okonly = 0 .soldout = 0 while match \%1 -* { # Scan command line words switch \%1 { :--errorsonly :-errorsonly, .errorsonly = 1, .okonly = 0, break :--okonly :-okonly, .okonly = 1, .errorsonly = 0, break :--soldout, .soldout = 1, break # Stop reading upon encountering :-soldout, .soldout = 1, break # a line that contains "SOLD OUT" :--help :-help, usage, exit 0 :default, exit 1 \%1: Invalid command-line option } shift # Get next command-line argument } define CHECKASIN { # Macro to ask Amazon.com about ASIN .rc = 0 # Assume product is available http get dp/\%1 \m(tmpfile) # Get the product page if fail { # 404 - Page not found .rc = 2 } else { # Have page - scan it grep /quiet "Currently unavailable" \m(tmpfile) if success .rc = 1 } if exist \m(tmpfile) delete /quiet \m(tmpfile) # Delete temporary file switch \m(rc) { # Report status, ASIN, and title :0, incr oks, if not errorsonly end 0 OK: \%1 \%2, break :1, incr nas, if not okonly end 0 NA: \%1 \%2, break :2, incr xxs, if not okonly end 0 XX: \%1 \%2, break } } # Initialize counters .lines = 0 # Lines read from web page .urls = 0 # Amazon URLs (links) found .oks = 0 # Amazon products available .nas = 0 # Amazon products not available .xxs = 0 # Amazon product pages not found .webpage := \fcontents(\%1) # Name of web page from command line if directory \m(webpage) exit 1 "Error: \m(webpage) is a directory" if not exist \m(webpage) exit 1 "Error: \m(webpage) not found" if not readable \m(webpage) exit 1 "Error: \m(webpage) not readable" fopen /read \%c \m(webpage) # Open the desired web page file if fail exit 1 set quiet on # Suppress HTTP messages http open www.amazon.com 80 # Open HTTP connection to Amazon.com if fail exit 1 .state = 0 # Initialize state machine while true { # Read the HTML file a line at a time fread /line \%c line # Read a line if fail break # End of file - done. if soldout { # Stop reading file upon reaching set case on # section marked SOLD OUT. if \findex(SOLD\32OUT,\m(line)) break set case off } increment lines # Count the line switch \m(state) { # Find ... :0 .\%x = \findex( \findex(/a>,\m(line)) \%x { # see if it ends on same line .state = 0 # It does - process it below break } .state = 1 # It doesn't - change state } continue :1 # Look for end-of-anchor tag .link := \m(link) \m(line) # Accumulate link code if \findex(/a>,\m(line)) { # see if this line ends link .state = 0 break } continue } # For efficiency this script is line oriented and assumes that no more than # one link can appear on a single line. This could be fixed with some effort. if > \fcount( tags in \m(link)." forward end } # Make sure it's an Amazon.com URL if not \findex(http:,\m(link)) continue if not \findex(amazon.com/,\m(link)) continue # Have an Amazon link incr urls # Count this link undef title # Get the title if any .\%x := \findex(title=,\m(link)) if \%x { # Have a title clause .title := \s(link[\%x]) # Isolate the text .title := \fword(\m(title),2,=,ALL,5) # Look how easy this is } else { .title := (no title found) } .\%x = \findex(/B00,\m(link)) # Check for ASIN if not \%x { # If not found... .\%x = \findex(/dp/,\m(link)) # check this way if \%x increment \%x 3 } if not \%x { # If still not found .\%x = \findex(/gp/product/,\m(link)) # check this way if \%x increment \%x 11 } if not \%x continue # No ASIN - keep going .asin := \fword(\s(link[\%x]),1,/?&,ALL,0) # Isolate the ASIN # Make sure it's a valid ASIN (B00xxxxxxx, where x = alphanum) if \fverify(0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ,\m(asin),3) { echo Warning: Suspicious ASIN \m(asin) } if != \flen(\m(asin)) 10 { echo Warning: Bad ASIN \m(asin) (length should be 10) } checkasin \m(asin) "\m(title)" # Check the ASIN and report status undef asin title # Clear these for next time. } show mac lines urls oks nas xxs # Show counters :END fclose all # Close files and connection http close exit # For EMACS... ; Local Variables: ; comment-column:40 ; comment-start:"# " ; End: