#!/net/u/1/f/fdc/kermit/wermit +
#
exit 1 SORRY - AMAZON.COM NO LONGER RESPONDS TO HTTP GETs from non-Web browsers
This script no longer works at all.
When this script was written in 2014, Amazon.com would respond to an HTTP GET
(did you know C-Kermit includes an HTTP client?) for a product URL (such as
http://www.amazon.com/dp/B000FPN8TK/) by sending the product page, if it still
existed, or with an error if the product code (ASIN) had been retired. If the
page was was successfully retrieved it could be scanned for "Currently
unavailable". Thus we could easily tell from the Unix command line if a given
product was still being sold, or out of stock, or had simply disappeared.
This script was designed to check all the Amazon ASINs in a given Web page in
one batch, producing a simple list of all the broken or problematic product
links in the web page.
BUT NOW when we send an HTTP GET from this script to Amazon.com, instead of
the desired product page we get a page that says "Sorry, we just need to make
sure you're not a robot... Type the characters you see in this image..." The
comments in the page source say:
"To discuss automated access to Amazon data please contact api-services-support
@amazon.com. For information about migrating to our APIs refer to our Marketpla
ce APIs at https://developer.amazonservices.com/ref=rm_c_sv, or our Product Adv
ertising API at https://affiliate-program.amazon.com/gp/advertising/api/detail/
main.html/ref=rm_c_ac for advertising use cases."
It might be possible to write a Kermit script that creates a Web page that
contains Javascript to query the desired products. See:
https://aws.amazon.com/code/JavaScript
Various other ASIN lookup tools existed in the past but they won't work any
more either.
- fdc, 27 Feb 2017
# Finds stale Amazon.com product links. Scans a web page for links to
# Amazon product pages that include Amazon Standard Identification Numbers
# (ASINs), which are ten-character strings that begin with B00 followed by
# seven more characters that can be decimal digits or ASCII uppercase letters,
# and then checks the availabity of the corresponding product in Amazon.com,
# producing a report like this:
#
# OK: B000FPN8TK Adagio Teas Ingenuitea Teapot
# OK: B001F51QQG Gillette GoodNews! Regular Disposable Razor 12 Count
# OK: B002RL9BS2 Wobble Wedges Leveling Shims
# OK: B0026I2HLO Fellowes Powershred W-11C 11 Sheet Cross-Cut Shredder
# OK: B005KD2O0Q Rubbermaid Comm Prod Brownstone Medium Brown Bath Mat
#
# In which the product status is indicated by the first field:
#
# OK: The product is available
# NA: The product is listed at Amazon but is not currently available
# XX: The ASIN is not valid, e.g. when product page has disappeared.
#
# If an Amazon product link includes a title, it is included in the listing,
# as shown above.
#
# Usage:
# checkasins [ options ] filename
#
# Where:
# filename is the name of a local html file containing links to Amazon.
# options are:
# --errorsonly (show only links to products that are not available)
# --okonly (show only links to products that are available)'
# If no options given, the status of all Amazon links is shown.
#
# Currently works only for Amazon.com, not Amazon.mx, Amazon.de, nor any of
# the other Amazon national branches, nor does it work for ISBNs. Also
# assumes there is not more than one product per line in the HTML file.
#
# Requires C-Kermit 9.0 or later.
#
# Frank da Cruz, January 2014
# Updated 27 April 2014 to allow for more-complicated Amazon URLs,
# and for ISBNs as well as ASINs.
#
.tmpfile = /tmp/__az.tmp # Temporary location for Amazon page
def usage {
echo "Usage: \fbasename(\%0) [options] filename-of-webpage"
echo "Options:"
echo " --errorsonly (only show ASINs of unavailable products)"
echo " --okonly (only show ASINs of available products)"
echo " --soldout (stop reading file at line containing SOLD OUT)"
echo " --help (print this message)"
}
if < \v(argc) 2 { usage, exit 1 }
define ON_CTRLC { # Ctrl-C trap
fclose all # Close all files
if exist \m(tmpfile) delete /quiet \m(tmpfile) # Delete any temp file
http close
exit
}
.errorsonly = 0 # Command-line options
.okonly = 0
.soldout = 0
while match \%1 -* { # Scan command line words
switch \%1 {
:--errorsonly
:-errorsonly, .errorsonly = 1, .okonly = 0, break
:--okonly
:-okonly, .okonly = 1, .errorsonly = 0, break
:--soldout, .soldout = 1, break # Stop reading upon encountering
:-soldout, .soldout = 1, break # a line that contains "SOLD OUT"
:--help
:-help, usage, exit 0
:default, exit 1 \%1: Invalid command-line option
}
shift # Get next command-line argument
}
define CHECKASIN { # Macro to ask Amazon.com about ASIN
.rc = 0 # Assume product is available
http get dp/\%1 \m(tmpfile) # Get the product page
if fail { # 404 - Page not found
.rc = 2
} else { # Have page - scan it
grep /quiet "Currently unavailable" \m(tmpfile)
if success .rc = 1
}
if exist \m(tmpfile) delete /quiet \m(tmpfile) # Delete temporary file
switch \m(rc) { # Report status, ASIN, and title
:0, incr oks, if not errorsonly end 0 OK: \%1 \%2, break
:1, incr nas, if not okonly end 0 NA: \%1 \%2, break
:2, incr xxs, if not okonly end 0 XX: \%1 \%2, break
}
}
# Initialize counters
.lines = 0 # Lines read from web page
.urls = 0 # Amazon URLs (links) found
.oks = 0 # Amazon products available
.nas = 0 # Amazon products not available
.xxs = 0 # Amazon product pages not found
.webpage := \fcontents(\%1) # Name of web page from command line
if directory \m(webpage) exit 1 "Error: \m(webpage) is a directory"
if not exist \m(webpage) exit 1 "Error: \m(webpage) not found"
if not readable \m(webpage) exit 1 "Error: \m(webpage) not readable"
fopen /read \%c \m(webpage) # Open the desired web page file
if fail exit 1
set quiet on # Suppress HTTP messages
http open www.amazon.com 80 # Open HTTP connection to Amazon.com
if fail exit 1
.state = 0 # Initialize state machine
while true { # Read the HTML file a line at a time
fread /line \%c line # Read a line
if fail break # End of file - done.
if soldout { # Stop reading file upon reaching
set case on # section marked SOLD OUT.
if \findex(SOLD\32OUT,\m(line)) break
set case off
}
increment lines # Count the line
switch \m(state) { # Find ...
:0
.\%x = \findex( \findex(/a>,\m(line)) \%x { # see if it ends on same line
.state = 0 # It does - process it below
break
}
.state = 1 # It doesn't - change state
}
continue
:1 # Look for end-of-anchor tag
.link := \m(link) \m(line) # Accumulate link code
if \findex(/a>,\m(line)) { # see if this line ends link
.state = 0
break
}
continue
}
# For efficiency this script is line oriented and assumes that no more than
# one link can appear on a single line. This could be fixed with some effort.
if > \fcount( tags in \m(link)."
forward end
}
# Make sure it's an Amazon.com URL
if not \findex(http:,\m(link)) continue
if not \findex(amazon.com/,\m(link)) continue
# Have an Amazon link
incr urls # Count this link
undef title # Get the title if any
.\%x := \findex(title=,\m(link))
if \%x { # Have a title clause
.title := \s(link[\%x]) # Isolate the text
.title := \fword(\m(title),2,=,ALL,5) # Look how easy this is
} else {
.title := (no title found)
}
.\%x = \findex(/B00,\m(link)) # Check for ASIN
if not \%x { # If not found...
.\%x = \findex(/dp/,\m(link)) # check this way
if \%x increment \%x 3
}
if not \%x { # If still not found
.\%x = \findex(/gp/product/,\m(link)) # check this way
if \%x increment \%x 11
}
if not \%x continue # No ASIN - keep going
.asin := \fword(\s(link[\%x]),1,/?&,ALL,0) # Isolate the ASIN
# Make sure it's a valid ASIN (B00xxxxxxx, where x = alphanum)
if \fverify(0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ,\m(asin),3) {
echo Warning: Suspicious ASIN \m(asin)
}
if != \flen(\m(asin)) 10 {
echo Warning: Bad ASIN \m(asin) (length should be 10)
}
checkasin \m(asin) "\m(title)" # Check the ASIN and report status
undef asin title # Clear these for next time.
}
show mac lines urls oks nas xxs # Show counters
:END
fclose all # Close files and connection
http close
exit
# For EMACS...
; Local Variables:
; comment-column:40
; comment-start:"# "
; End: