This project has moved. For the latest updates, please go here.

Links with anchors point to MSDN (FIX!)

Feb 19, 2012 at 11:46 AM

As other people have noted, a huge number of links in the generated CHM end up pointing to the MSDN website. Since that defeats the whole point of creating a CHM, it makes this tool useless.

I investigated, and found that it happens when the link has an URL with an anchor in it, such as http://msdn.microsoft.com/en-us/library/aa511441.aspx#Userscenarios. This is because Link.Resolve() isn't expecting it, and tries to look up the the assetId with the anchor appended.



Here's my revised Link.Resolve() function, which just strips the anchor off of the assetId. There is more work to do, it seems, to get the anchors to actually work. First of all, it seems they don't even exist in the generated output (there are no <a name="foo"> tags). And the links don't have the anchor in them. Is this something to be fixed in the .xslt files? What the heck are those? At least the links take you to the right page anyhow...

 

        public string Resolve(string href, string version, string locale, bool returnContentId)
        {
//            Console.WriteLine("href: " + href);
            if(href.ToLower().StartsWith("assetid:") == true)
            {
                string assetId = HttpUtility.UrlDecode(href.Remove(0, "assetid:".Length).ToLower());
                int anchorpos = assetId.IndexOf("#");
                string anchor = "";

                if (anchorpos > 0)
                {
                    anchor = assetId.Remove(0, anchorpos - 1);
                    assetId = assetId.Remove(anchorpos);
                }
//                Console.WriteLine("assetId: "+assetId);

                DataRow row = contentDataSet.Tables["Item"].Rows.Find(assetId);

                if (row == null)
                {
                    string target = assetId;

                    if (links.ContainsKey(assetId) == true)
                        target = links[assetId];

                    // Added d=ide for a view that hides the TOC.
                    // TODO: change from msdn2 when possible.
//                    Console.WriteLine("return: http://msdn2.microsoft.com/library/" + target + "(" + version + "," +
                        locale + ",d=ide)" + anchor + ".aspx");
                    return "http://msdn2.microsoft.com/library/" + target + "(" + version + "," +
                        locale + ",d=ide)" + anchor + ".aspx";
                }

                if (returnContentId == true)
                {
//                    Console.WriteLine("return: "+row["ContentId"].ToString());
                    return row["ContentId"].ToString();
                }
                else
                {
//                    Console.WriteLine("return: "+assetId);
                    return assetId;
                }
            }
//            Console.WriteLine("return: "+href);
            return href;
        } 
Feb 19, 2012 at 12:47 PM

Output to CHM works, including anchors :D

 

Link.cs:

using System;
using System.Collections.Generic;
using System.Data;
using System.Text;
using System.Web;

namespace PackageThis
{
    public class Link
    {
        private Content contentDataSet;
        private Dictionary<string, string> links;

        public Link(Content contentDataSet, Dictionary<string, string> links)
        {
            this.contentDataSet = contentDataSet;
            this.links = links;
        }

        public string Anchor(string href)
        {
            int anchorpos = href.IndexOf("#");
            if (anchorpos < 0)
                return "";

            return href.Remove(0, anchorpos);
        }

        // Called by the transform to lookup an href. If it begins with "AssetId:", we lookup
        // its aKeyword.
        public string Resolve(string href, string version, string locale, bool returnContentId)
        {
//            Console.WriteLine("href: " + href);
            if(href.ToLower().StartsWith("assetid:") == true)
            {
                string assetId = HttpUtility.UrlDecode(href.Remove(0, "assetid:".Length).ToLower());
                int anchorpos = assetId.IndexOf("#");
                string anchor = "";

                if (anchorpos > 0)
                {
                    anchor = assetId.Remove(0, anchorpos);
                    assetId = assetId.Remove(anchorpos);
                }
//                Console.WriteLine("assetId: "+assetId);

                DataRow row = contentDataSet.Tables["Item"].Rows.Find(assetId);

                if ((row != null) && (row["ContentId"] != null))
                {
//                    Console.WriteLine("contentId: " + row["ContentId"].ToString());
                }

                if (row == null)
                {
                    string target = assetId;

                    if (links.ContainsKey(assetId) == true)
                        target = links[assetId];

                    // Added d=ide for a view that hides the TOC.
                    // TODO: change from msdn2 when possible.
//                    Console.WriteLine("return: http://msdn2.microsoft.com/library/" + target + "(" + version + "," +
//                        locale + ",d=ide)" + anchor + ".aspx");
                    return "http://msdn2.microsoft.com/library/" + target + "(" + version + "," +
                        locale + ",d=ide)" + anchor + ".aspx";
                }

                if (returnContentId == true)
                {
                    // what is this? should it have the anchor or not?
//                    Console.WriteLine("return: "+row["ContentId"].ToString());
                    return row["ContentId"].ToString();
                }
                else
                {
                    if (anchorpos > 0)
                    {
//                        Console.WriteLine("return: " + assetId + anchor);
                        return assetId + anchor;
                    }
                    else
                    {
//                        Console.WriteLine("return: " + assetId);
                        return assetId;
                    }
                }
            }
//            Console.WriteLine("return: "+href);
            return href;
        } 
    }
}

And then in chm.xslt:

<xsl:template match="xhtml:a|xhtml:A|a|A">
    <xsl:choose>
      <!-- Some documentation has empty <a name="aName"> tags which end up self-closing, which confuses
    IE and Firefox. The comment within is to prevent a self-closing tag.  -->
      <xsl:when test="@name or @id">
        <a>
          <xsl:apply-templates select="@*" />
          <xsl:apply-templates />
          <xsl:comment>*</xsl:comment>
        </a>
      </xsl:when>

      <xsl:when test="@href">
        <xsl:choose>
          <xsl:when test="starts-with(@href,'#') or starts-with(@href,'http:')">
            <a>
              <xsl:apply-templates select="@*" />
              <xsl:attribute name="href">
                <xsl:value-of select="@href" />
              </xsl:attribute>
              <xsl:apply-templates/>
            </a>
          </xsl:when>

          <xsl:when test ="starts-with(@href, 'AssetId:')">
            <xsl:variable name="link">
              <xsl:value-of select="hxLink:Resolve(@href, $version, $locale, true())"/>
            </xsl:variable>

            <a>
              <xsl:apply-templates select="@*" />
              <xsl:attribute name="href">
                <xsl:choose>
                  <xsl:when test="starts-with($link, 'http:')">
                    <xsl:value-of select="$link" />
                  </xsl:when>
                  <xsl:otherwise>
                    <xsl:text>html/</xsl:text>
                    <xsl:value-of select="$link" />
                    <xsl:text>.htm</xsl:text>
                    <xsl:value-of select="hxLink:Anchor(@href)" />
                  </xsl:otherwise>
                </xsl:choose>
              </xsl:attribute>
              <xsl:apply-templates/>
            </a>

          </xsl:when>


        </xsl:choose>
      </xsl:when>
    </xsl:choose>
  </xsl:template>